diff options
323 files changed, 7697 insertions, 2403 deletions
diff --git a/configs/default b/configs/default index 8b4557d26a8..ec9670dd634 100644 --- a/configs/default +++ b/configs/default @@ -98,7 +98,7 @@ EGL_DRIVERS_DIRS = demo # Gallium directories and GALLIUM_DIRS = auxiliary drivers state_trackers GALLIUM_AUXILIARIES = $(TOP)/src/gallium/auxiliary/libgallium.a -GALLIUM_DRIVERS_DIRS = softpipe failover svga i915 i965 trace identity +GALLIUM_DRIVERS_DIRS = softpipe failover svga i915 i965 r300 trace identity GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVERS_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) GALLIUM_WINSYS_DIRS = xlib egl_xlib GALLIUM_WINSYS_DRM_DIRS = diff --git a/configs/linux-debug b/configs/linux-debug index 8b0b3126db6..01763b1a30d 100644 --- a/configs/linux-debug +++ b/configs/linux-debug @@ -5,5 +5,5 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-debug OPT_FLAGS = -g -CFLAGS += -ansi -pedantic +CFLAGS += -pedantic DEFINES += -DDEBUG -DDEBUG_MATH diff --git a/configure.ac b/configure.ac index a5e181dbf44..6ae526d5e1e 100644 --- a/configure.ac +++ b/configure.ac @@ -96,7 +96,14 @@ esac dnl Add flags for gcc and g++ if test "x$GCC" = xyes; then - CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -std=c99 -ffast-math -fvisibility=hidden" + CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -std=c99 -ffast-math" + + # Enable -fvisibility=hidden if using a gcc that supports it + save_CFLAGS="$CFLAGS" + AC_MSG_CHECKING([whether $(CC) supports -fvisibility=hidden]) + CFLAGS="$CFLAGS -fvisibility=hidden" + AC_LINK_IFELSE([AC_LANG_PROGRAM()], AC_MSG_RESULT([yes]), + [CFLAGS="$save_CFLAGS" ; AC_MSG_RESULT([no])]); # Work around aliasing bugs - developers should comment this out CFLAGS="$CFLAGS -fno-strict-aliasing" @@ -580,9 +587,9 @@ dri) # Check for libdrm PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED]) PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED]) - GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED" - DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED" PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED]) + GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED glproto >= $GLPROTO_REQUIRED" + DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED" # find the DRI deps for libGL if test "$x11_pkgconfig" = yes; then @@ -1291,10 +1298,12 @@ AC_ARG_ENABLE([gallium-radeon], [AS_HELP_STRING([--enable-gallium-radeon], [build gallium radeon @<:@default=disabled@:>@])], [enable_gallium_radeon="$enableval"], - [enable_gallium_radeon=no]) + [enable_gallium_radeon=auto]) if test "x$enable_gallium_radeon" = xyes; then GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS radeon" GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" +elif test "x$enable_gallium_radeon" = xauto; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" fi dnl diff --git a/include/GL/gl.h b/include/GL/gl.h index c163171dc76..36153b159b0 100644 --- a/include/GL/gl.h +++ b/include/GL/gl.h @@ -67,7 +67,8 @@ #elif defined(__CYGWIN__) && defined(USE_OPENGL32) /* use native windows opengl32 */ # define GLAPI extern # define GLAPIENTRY __stdcall -#elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +#elif (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define GLAPI __attribute__((visibility("default"))) # define GLAPIENTRY #endif /* WIN32 && !CYGWIN */ diff --git a/progs/demos/engine.c b/progs/demos/engine.c index c54e3b8fb80..7e485111da7 100644 --- a/progs/demos/engine.c +++ b/progs/demos/engine.c @@ -120,7 +120,11 @@ static Engine Engines[NUM_ENGINES] = 0.3, /* CrankJournalRadius */ 0.4, /* CrankJournalLength */ 1.5, /* ConnectingRodLength */ - 0.1 /* ConnectingRodThickness */ + 0.1, /* ConnectingRodThickness */ + 0, /* CrankList */ + 0, /* ConnRodList */ + 0, /* PistonList */ + 0 /* BlockList */ }, { "Inline-4", @@ -136,7 +140,11 @@ static Engine Engines[NUM_ENGINES] = 0.3, /* CrankJournalRadius */ 0.4, /* CrankJournalLength */ 1.5, /* ConnectingRodLength */ - 0.1 /* ConnectingRodThickness */ + 0.1, /* ConnectingRodThickness */ + 0, /* CrankList */ + 0, /* ConnRodList */ + 0, /* PistonList */ + 0 /* BlockList */ }, { "Boxer-6", @@ -152,7 +160,11 @@ static Engine Engines[NUM_ENGINES] = 0.3, /* CrankJournalRadius */ 0.4, /* CrankJournalLength */ 1.5, /* ConnectingRodLength */ - 0.1 /* ConnectingRodThickness */ + 0.1, /* ConnectingRodThickness */ + 0, /* CrankList */ + 0, /* ConnRodList */ + 0, /* PistonList */ + 0 /* BlockList */ } }; diff --git a/progs/demos/fbotexture.c b/progs/demos/fbotexture.c index 56482663dc4..46bf1c5f6a2 100644 --- a/progs/demos/fbotexture.c +++ b/progs/demos/fbotexture.c @@ -14,7 +14,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <math.h> #include "extfuncs.h" /* For debug */ diff --git a/progs/demos/fire.c b/progs/demos/fire.c index 9c351e80e51..bb912fb4473 100644 --- a/progs/demos/fire.c +++ b/progs/demos/fire.c @@ -726,8 +726,13 @@ main(int ac, char **av) maxage = 1.0 / dt; - if (ac == 2) + if (ac == 2) { np = atoi(av[1]); + if (np <= 0 || np > 1000000) { + fprintf(stderr, "Invalid input.\n"); + exit(-1); + } + } if (ac == 4) { WIDTH = atoi(av[2]); @@ -761,6 +766,7 @@ main(int ac, char **av) assert(np > 0); p = (part *) malloc(sizeof(part) * np); + assert(p); for (i = 0; i < np; i++) setnewpart(&p[i]); diff --git a/progs/demos/isosurf.c b/progs/demos/isosurf.c index dbe4d8d172a..a5b21ffb5c3 100644 --- a/progs/demos/isosurf.c +++ b/progs/demos/isosurf.c @@ -27,7 +27,6 @@ #include <stdio.h> #include <string.h> #include <stdlib.h> -#include <string.h> #include <math.h> #ifdef _WIN32 #include <windows.h> diff --git a/progs/demos/morph3d.c b/progs/demos/morph3d.c index 0f8ac426f33..07458eb156f 100644 --- a/progs/demos/morph3d.c +++ b/progs/demos/morph3d.c @@ -137,7 +137,6 @@ So the angle is: #endif #include <GL/glut.h> #include <math.h> -#include <string.h> #define Scale 0.3 diff --git a/progs/egl/xeglgears.c b/progs/egl/xeglgears.c index 614a6256037..9fdf4742441 100644 --- a/progs/egl/xeglgears.c +++ b/progs/egl/xeglgears.c @@ -604,7 +604,7 @@ event_loop(struct egl_manager *eman, EGLint surface_type, EGLint w, EGLint h) break; case EGL_PBUFFER_BIT: eglWaitClient(); - if (!eglCopyBuffers(eman->xdpy, eman->pbuf, eman->xpix)) + if (!eglCopyBuffers(eman->dpy, eman->pbuf, eman->xpix)) break; /* fall through */ case EGL_PIXMAP_BIT: diff --git a/progs/glsl/convolutions.c b/progs/glsl/convolutions.c index 350e61bbdc5..fdfaf568a25 100644 --- a/progs/glsl/convolutions.c +++ b/progs/glsl/convolutions.c @@ -182,7 +182,7 @@ static void fillConvolution(GLint *k, static void setupConvolution() { GLint *kernel = (GLint*)malloc(sizeof(GLint) * 9); - GLfloat scale; + GLfloat scale = 0.0; GLfloat *vecKer = (GLfloat*)malloc(sizeof(GLfloat) * 9 * 4); GLuint loc; GLuint i; diff --git a/progs/glsl/shtest.c b/progs/glsl/shtest.c index 7b1917be1ce..520eccfb6d8 100644 --- a/progs/glsl/shtest.c +++ b/progs/glsl/shtest.c @@ -29,7 +29,6 @@ #include <assert.h> -#include <string.h> #include <stdio.h> #include <stdlib.h> #include <string.h> diff --git a/progs/redbook/aapoly.c b/progs/redbook/aapoly.c index b7b2b270902..64d06b7b3da 100644 --- a/progs/redbook/aapoly.c +++ b/progs/redbook/aapoly.c @@ -45,7 +45,6 @@ #include <GL/glut.h> #include <stdlib.h> #include <stdio.h> -#include <string.h> GLboolean polySmooth = GL_TRUE; diff --git a/progs/tests/getprocaddress.c b/progs/tests/getprocaddress.c index e699baf44bc..38ca7000df2 100644 --- a/progs/tests/getprocaddress.c +++ b/progs/tests/getprocaddress.c @@ -660,8 +660,8 @@ exercise_CompressedTextures(GLenum dimension) glGetTexLevelParameteriv(dimension, 0, GL_TEXTURE_COMPRESSED_IMAGE_SIZE_ARB, &queryCompressedSize); if (queryCompressedSize != sizeof(compressedTexture)) { - fprintf(stderr, "%s: compressed 3D texture changed size: expected %d, actual %d\n", - __FUNCTION__, sizeof(compressedTexture), queryCompressedSize); + fprintf(stderr, "%s: compressed 3D texture changed size: expected %lu, actual %d\n", + __FUNCTION__, (unsigned long) sizeof(compressedTexture), queryCompressedSize); return GL_FALSE; } (*GetCompressedTexImageARB)(dimension, 0, queryCompressedData); diff --git a/progs/tests/interleave.c b/progs/tests/interleave.c index 47bf9dfbe5e..acf67d02c12 100644 --- a/progs/tests/interleave.c +++ b/progs/tests/interleave.c @@ -105,7 +105,7 @@ static const unsigned indicies[12] = { 1, 4, 2 }; -#define NONE { NULL, 0, 0, 0 } +#define NONE { NULL, 0, 0, 0, sizeof( NULL ) } #define V2F { v, 2, 2 * sizeof( GLfloat ), GL_FLOAT, sizeof( v[0] ) } #define V3F { v, 3, 3 * sizeof( GLfloat ), GL_FLOAT, sizeof( v[0] ) } #define V4F { v, 4, 4 * sizeof( GLfloat ), GL_FLOAT, sizeof( v[0] ) } diff --git a/progs/tests/texwrap.c b/progs/tests/texwrap.c index 92c8a2f14c2..39c55919dd2 100644 --- a/progs/tests/texwrap.c +++ b/progs/tests/texwrap.c @@ -71,7 +71,7 @@ static struct wrap_mode modes[] = { WRAP_EXT ( GL_MIRROR_CLAMP_TO_EDGE_EXT, "GL_ATI_texture_mirror_once", "GL_EXT_texture_mirror_clamp", 999.0 ), - { 0 } + { 0, NULL, GL_FALSE, 0.0, { NULL, NULL } } }; static void diff --git a/progs/tests/vparray.c b/progs/tests/vparray.c index fe168c6cd56..75160afd463 100644 --- a/progs/tests/vparray.c +++ b/progs/tests/vparray.c @@ -8,7 +8,6 @@ #include <assert.h> #include <stdio.h> -#include <string.h> #include <stdlib.h> #include <string.h> #include <math.h> @@ -183,7 +182,7 @@ static void init_program(void) static const GLfloat bias[4] = {1.0, 1.0, 1.0, 0.0}; if (!glutExtensionSupported("GL_NV_vertex_program")) { - printf("Sorry, this program requires GL_NV_vertex_program"); + printf("Sorry, this program requires GL_NV_vertex_program\n"); exit(1); } diff --git a/progs/trivial/Makefile b/progs/trivial/Makefile index e15ec33ab59..5e08d603890 100644 --- a/progs/trivial/Makefile +++ b/progs/trivial/Makefile @@ -119,6 +119,7 @@ SOURCES = \ tri-lit-material.c \ tri-mask-tri.c \ tri-orig.c \ + tri-point-line-clipped.c \ tri-query.c \ tri-repeat.c \ tri-scissor-tri.c \ diff --git a/progs/trivial/SConscript b/progs/trivial/SConscript index 613383c77b1..e9ed1cb71e0 100644 --- a/progs/trivial/SConscript +++ b/progs/trivial/SConscript @@ -96,6 +96,7 @@ progs = [ 'tri-logicop-xor', 'tri-mask-tri', 'tri-orig', + 'tri-point-line-clipped', 'tri-query', 'tri-repeat', 'tri-scissor-tri', diff --git a/progs/trivial/tri-fbo-tex-mip.c b/progs/trivial/tri-fbo-tex-mip.c index 07443695011..df4725c7b4e 100644 --- a/progs/trivial/tri-fbo-tex-mip.c +++ b/progs/trivial/tri-fbo-tex-mip.c @@ -6,7 +6,6 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> #include <math.h> /* For debug */ diff --git a/progs/trivial/tri-fbo-tex.c b/progs/trivial/tri-fbo-tex.c index 8d1f871328d..eacb7d577be 100644 --- a/progs/trivial/tri-fbo-tex.c +++ b/progs/trivial/tri-fbo-tex.c @@ -6,8 +6,6 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> -#include <math.h> /* For debug */ diff --git a/progs/trivial/tri-point-line-clipped.c b/progs/trivial/tri-point-line-clipped.c new file mode 100644 index 00000000000..f8c1015f5f4 --- /dev/null +++ b/progs/trivial/tri-point-line-clipped.c @@ -0,0 +1,116 @@ +/** + * Test frustum/user clipping w/ glPolygonMode LINE/POINT. + * + * The bottom/left and bottom/right verts are outside the frustum and clipped. + * The top vertex is clipped by a user clipping plane. + * + * A filled gray reference triangle is shown underneath the points/lines. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <GL/glut.h> + + +static int win; + + +static void +ColorTri(void) +{ + glBegin(GL_TRIANGLES); + glColor3f(1, 0, 0); glVertex3f(-1.5, -0.8, 0.0); + glColor3f(0, 1, 0); glVertex3f( 1.5, -0.8, 0.0); + glColor3f(0, 0, 1); glVertex3f( 0.0, 0.9, 0.0); + glEnd(); +} + + +static void +GrayTri(void) +{ + glColor3f(0.3, 0.3, 0.3); + glBegin(GL_TRIANGLES); + glVertex3f(-1.5, -0.8, 0.0); + glVertex3f( 1.5, -0.8, 0.0); + glVertex3f( 0.0, 0.9, 0.0); + glEnd(); +} + + +static void +Draw(void) +{ + static const GLdouble plane[4] = { 0, -1.0, 0, 0.5 }; + + glClear(GL_COLOR_BUFFER_BIT); + + glPointSize(13.0); + glLineWidth(5.0); + + glClipPlane(GL_CLIP_PLANE0, plane); + glEnable(GL_CLIP_PLANE0); + + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + GrayTri(); + + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + ColorTri(); + + glPolygonMode(GL_FRONT_AND_BACK, GL_POINT); + ColorTri(); + + glutSwapBuffers(); +} + + +static void Reshape(int width, int height) +{ + glViewport(0, 0, (GLint)width, (GLint)height); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0); + glMatrixMode(GL_MODELVIEW); +} + + +static void +Key(unsigned char key, int x, int y) +{ + if (key == 27) { + glutDestroyWindow(win); + exit(0); + } + else { + glutPostRedisplay(); + } +} + + +static void +Init(void) +{ + fprintf(stderr, "GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER)); + fprintf(stderr, "GL_VERSION = %s\n", (char *) glGetString(GL_VERSION)); + fprintf(stderr, "GL_VENDOR = %s\n", (char *) glGetString(GL_VENDOR)); + fflush(stderr); +} + + +int +main(int argc, char **argv) +{ + glutInitWindowSize(300, 300); + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_DOUBLE | GLUT_DEPTH); + win = glutCreateWindow(*argv); + if (!win) { + return 1; + } + Init(); + glutReshapeFunc(Reshape); + glutKeyboardFunc(Key); + glutDisplayFunc(Draw); + glutMainLoop(); + return 0; +} diff --git a/progs/xdemos/corender.c b/progs/xdemos/corender.c index 640c902c136..e706f4b3da5 100644 --- a/progs/xdemos/corender.c +++ b/progs/xdemos/corender.c @@ -20,7 +20,6 @@ #include <math.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> #include <X11/keysym.h> #include <unistd.h> #include "ipc.h" diff --git a/progs/xdemos/manywin.c b/progs/xdemos/manywin.c index 3b0810b2e54..8ad5c4fe49f 100644 --- a/progs/xdemos/manywin.c +++ b/progs/xdemos/manywin.c @@ -400,6 +400,8 @@ main(int argc, char *argv[]) } if (n < 1) n = 1; + if (n > MAX_HEADS) + n = MAX_HEADS; printf("%d windows\n", n); for (i = 0; i < n; i++) { diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c index 96292b0e9e8..7c6e8637a1c 100644 --- a/src/egl/drivers/glx/egl_glx.c +++ b/src/egl/drivers/glx/egl_glx.c @@ -899,7 +899,7 @@ GLX_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) * Called from eglGetProcAddress() via drv->API.GetProcAddress(). */ static _EGLProc -GLX_eglGetProcAddress(const char *procname) +GLX_eglGetProcAddress(_EGLDriver *drv, const char *procname) { return (_EGLProc) glXGetProcAddress((const GLubyte *) procname); } diff --git a/src/egl/drivers/xdri/egl_xdri.c b/src/egl/drivers/xdri/egl_xdri.c index d2affc66dd4..8425b3d11ef 100644 --- a/src/egl/drivers/xdri/egl_xdri.c +++ b/src/egl/drivers/xdri/egl_xdri.c @@ -355,7 +355,7 @@ xdri_eglTerminate(_EGLDriver *drv, _EGLDisplay *dpy) * Called from eglGetProcAddress() via drv->API.GetProcAddress(). */ static _EGLProc -xdri_eglGetProcAddress(const char *procname) +xdri_eglGetProcAddress(_EGLDriver *drv, const char *procname) { /* the symbol is defined in libGL.so */ return (_EGLProc) _glapi_get_proc_address(procname); @@ -562,7 +562,7 @@ xdri_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw) struct xdri_egl_display *xdri_dpy = lookup_display(dpy); struct xdri_egl_surface *xdri_surf = lookup_surface(draw); - xdri_dpy->psc->driScreen->swapBuffers(xdri_surf->driDrawable); + xdri_dpy->psc->driScreen->swapBuffers(xdri_surf->driDrawable, 0, 0, 0); return EGL_TRUE; } diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 14cc5fa6137..26e0602453a 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -716,7 +716,8 @@ void (* EGLAPIENTRY eglGetProcAddress(const char *procname))() /* now loop over drivers to query their procs */ for (i = 0; i < _eglGlobal.NumDrivers; i++) { - _EGLProc p = _eglGlobal.Drivers[i]->API.GetProcAddress(procname); + _EGLDriver *drv = _eglGlobal.Drivers[i]; + _EGLProc p = drv->API.GetProcAddress(drv, procname); if (p) return p; } diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index aa0abe3eb6b..080f2155e3a 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -44,7 +44,7 @@ typedef const char *(*QueryString_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLint n typedef EGLBoolean (*WaitClient_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx); typedef EGLBoolean (*WaitNative_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine); -typedef _EGLProc (*GetProcAddress_t)(const char *procname); +typedef _EGLProc (*GetProcAddress_t)(_EGLDriver *drv, const char *procname); diff --git a/src/egl/main/eglcompiler.h b/src/egl/main/eglcompiler.h index f7c93f14ce2..5a3fb49ac22 100644 --- a/src/egl/main/eglcompiler.h +++ b/src/egl/main/eglcompiler.h @@ -64,7 +64,8 @@ /** * Function visibility */ -#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +#if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PUBLIC __attribute__((visibility("default"))) #else # define PUBLIC diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index e3af41c6e04..8f937e3b4e9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -111,6 +111,7 @@ C_SOURCES = \ util/u_math.c \ util/u_mm.c \ util/u_rect.c \ + util/u_ringbuffer.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ util/u_stream_stdc.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 782eb533863..f957090b5fb 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -147,6 +147,7 @@ source = [ 'util/u_math.c', 'util/u_mm.c', 'util/u_rect.c', + 'util/u_ringbuffer.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', 'util/u_stream_stdc.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 2b16332e143..fdfb5faa59e 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -539,6 +539,38 @@ void cso_restore_samplers(struct cso_context *ctx) cso_single_sampler_done( ctx ); } +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = cso_single_vertex_sampler( ctx, i, templates[i] ); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < ctx->nr_samplers; i++) { + temp = cso_single_vertex_sampler( ctx, i, NULL ); + if (temp != PIPE_OK) + error = temp; + } + + cso_single_vertex_sampler_done( ctx ); + + return error; +} + void cso_save_vertex_samplers(struct cso_context *ctx) { diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index b9e313e32d6..d2089b1c883 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -84,6 +84,10 @@ enum pipe_error cso_single_sampler( struct cso_context *cso, void cso_single_sampler_done( struct cso_context *cso ); +enum pipe_error cso_set_vertex_samplers(struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states); + void cso_save_vertex_samplers(struct cso_context *cso); diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 667aa46b208..e90dfc5aec4 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -95,6 +95,7 @@ void draw_destroy( struct draw_context *draw ) draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); draw_vs_destroy( draw ); + draw_gs_destroy( draw ); FREE( draw ); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index b716209df29..8a64c06efcd 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -164,6 +164,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count); +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void draw_flush(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 5db2e755423..daf8d071f12 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -59,6 +59,15 @@ draw_gs_init( struct draw_context *draw ) return TRUE; } +void draw_gs_destroy( struct draw_context *draw ) +{ + if (!draw->gs.machine) + return; + + align_free(draw->gs.machine->Primitives); + + tgsi_exec_machine_destroy(draw->gs.machine); +} void draw_gs_set_constants( struct draw_context *draw, const float (*constants)[4], diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 205cda5eabe..51a6115ebf5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -55,7 +55,7 @@ -struct clipper { +struct clip_stage { struct draw_stage stage; /**< base class */ /* Basically duplicate some of the flatshading logic here: @@ -70,9 +70,9 @@ struct clipper { /* This is a bit confusing: */ -static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { - return (struct clipper *)stage; + return (struct clip_stage *)stage; } @@ -92,11 +92,12 @@ static void interp_attr( float *fdst, fdst[3] = LINTERP( t, fout[3], fin[3] ); } + static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) { - const struct clipper *clipper = clipper_stage(stage); + const struct clip_stage *clipper = clip_stage(stage); uint i; for (i = 0; i < clipper->num_color_attribs; i++) { const uint attr = clipper->color_attribs[i]; @@ -108,7 +109,7 @@ static void copy_colors( struct draw_stage *stage, /* Interpolate between two vertices to produce a third. */ -static void interp( const struct clipper *clip, +static void interp( const struct clip_stage *clip, struct vertex_header *dst, float t, const struct vertex_header *out, @@ -179,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ if (i == n-1) - header.flags |= edge_last; + header.flags |= edge_last; if (0) { const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; @@ -200,13 +201,14 @@ static void emit_poly( struct draw_stage *stage, } } + static INLINE float dot4(const float *a, const float *b) { - return (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); + return (a[0] * b[0] + + a[1] * b[1] + + a[2] * b[2] + + a[3] * b[3]); } @@ -217,7 +219,7 @@ do_clip_tri( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *a[MAX_CLIPPED_VERTICES]; struct vertex_header *b[MAX_CLIPPED_VERTICES]; struct vertex_header **inlist = a; @@ -280,6 +282,7 @@ do_clip_tri( struct draw_stage *stage, dp_prev = dp; } + /* swap in/out lists */ { struct vertex_header **tmp = inlist; inlist = outlist; @@ -291,15 +294,11 @@ do_clip_tri( struct draw_stage *stage, /* If flat-shading, copy color to new provoking vertex. */ if (clipper->flat && inlist[0] != header->v[2]) { - if (1) { - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - } + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[2]); } - - /* Emit the polygon as triangles to the setup stage: */ if (n >= 3) @@ -314,7 +313,7 @@ do_clip_line( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - const struct clipper *clipper = clipper_stage( stage ); + const struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; const float *pos0 = v0->clip; @@ -416,13 +415,14 @@ clip_tri( struct draw_stage *stage, } } + /* Update state. Could further delay this until we hit the first * primitive that really requires clipping. */ static void clip_init_state( struct draw_stage *stage ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; @@ -488,7 +488,7 @@ static void clip_destroy( struct draw_stage *stage ) */ struct draw_stage *draw_clip_stage( struct draw_context *draw ) { - struct clipper *clipper = CALLOC_STRUCT(clipper); + struct clip_stage *clipper = CALLOC_STRUCT(clip_stage); if (clipper == NULL) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 1a5269c0de9..d40c0352401 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf, /* Note: we really do want data[0] here, not data[pos]: */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); - vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -271,10 +271,12 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index e49041556bd..ef49e575366 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -172,6 +172,8 @@ struct draw_context boolean force_passthrough; /**< never clip or shade */ + boolean dump_vs; + double mrd; /**< minimum resolvable depth value, for polygon offset */ /* pipe state that we need: */ @@ -239,6 +241,8 @@ struct draw_context unsigned reduced_prim; + unsigned instance_id; + void *driver_private; }; @@ -265,6 +269,7 @@ boolean draw_gs_init( struct draw_context *draw ); void draw_gs_set_constants( struct draw_context *, const float (*constants)[4], unsigned size ); +void draw_gs_destroy( struct draw_context *draw ); /******************************************************************************* * Common shading code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 2801dbafe47..a5ddec52863 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -280,20 +280,33 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - unsigned reduced_prim = u_reduced_prim(prim); + draw_arrays_instanced(draw, prim, start, count, 0, 1); +} + +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + unsigned reduced_prim = u_reduced_prim(mode); + unsigned instance; + if (reduced_prim != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, prim, start, MIN2(count, 20)); + draw_print_arrays(draw, mode, start, MIN2(count, 20)); #if 0 { int i; - debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", - prim, start, count); + debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", + mode, start, count); tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { @@ -311,6 +324,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, } #endif - /* drawing done here: */ - draw_pt_arrays(draw, prim, start, count); + for (instance = 0; instance < instanceCount; instance++) { + draw->instance_id = instance + startInstance; + draw_pt_arrays(draw, mode, start, count); + } } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 20edf7a227e..d5e0d92a605 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vertex_input_count, - unsigned vertex_size ); + unsigned vertex_size, + unsigned instance_id_index ); void draw_pt_fetch_run( struct pt_fetch *fetch, const unsigned *elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 064e16c295c..4fb53276bbe 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -121,10 +121,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit, emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; @@ -204,6 +206,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->run( translate, 0, vertex_count, + draw->instance_id, hw_verts ); render->unmap_vertices( render, @@ -263,6 +266,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->instance_id, hw_verts); if (0) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 305bfef4352..55e7a7b81ad 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -58,12 +58,14 @@ struct pt_fetch { */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vs_input_count, - unsigned vertex_size ) + unsigned vertex_size, + unsigned instance_id_index ) { struct draw_context *draw = fetch->draw; unsigned nr_inputs; - unsigned i, nr = 0; + unsigned i, nr = 0, ei = 0; unsigned dst_offset = 0; + unsigned num_extra_inputs = 0; struct translate_key key; fetch->vertex_size = vertex_size; @@ -78,9 +80,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { /* Need to set header->vertex_id = 0xffff somehow. */ + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; + key.element[nr].instance_divisor = 0; key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].output_offset = dst_offset; dst_offset += 1 * sizeof(float); @@ -91,19 +95,36 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, */ dst_offset += 4 * sizeof(float); } - - assert( draw->pt.nr_vertex_elements >= vs_input_count ); - nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements ); + if (instance_id_index != ~0) { + num_extra_inputs++; + } + + assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count); + + nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs); for (i = 0; i < nr_inputs; i++) { - key.element[nr].input_format = draw->pt.vertex_element[i].src_format; - key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; - key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - key.element[nr].output_offset = dst_offset; + if (i == instance_id_index) { + key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; + key.element[nr].input_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_offset = dst_offset; + + dst_offset += sizeof(uint); + } else { + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; + key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + ei++; + dst_offset += 4 * sizeof(float); + } - dst_offset += 4 * sizeof(float); nr++; } @@ -158,6 +179,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, translate->run_elts( translate, elts, count, + draw->instance_id, verts ); } @@ -183,6 +205,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, translate->run( translate, start, count, + draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e7fe6b3b768..2a604470e9a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -166,9 +166,11 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, continue; } + key.element[i].type = TRANSLATE_ELEMENT_NORMAL; key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; + key.element[i].instance_divisor = src->instance_divisor; key.element[i].output_format = output_format; key.element[i].output_offset = dst_offset; @@ -256,6 +258,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, feme->translate->run_elts( feme->translate, fetch_elts, fetch_count, + draw->instance_id, hw_verts ); if (0) { @@ -314,6 +317,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); if (0) { @@ -374,6 +378,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 1a9df4cac5d..23da556f797 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -59,6 +59,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; + unsigned i; + unsigned instance_id_index = ~0; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -66,6 +68,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned nr = MAX2( vs->info.num_inputs, vs->info.num_outputs + 1 ); + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + fpme->prim = prim; fpme->opt = opt; @@ -79,7 +90,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, vs->info.num_inputs, - fpme->vertex_size ); + fpme->vertex_size, + instance_id_index ); /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 35536895326..e03ac8c2291 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -43,11 +43,11 @@ #include "translate/translate.h" #include "translate/translate_cache.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" - void draw_vs_set_constants( struct draw_context *draw, const float (*constants)[4], unsigned size ) @@ -83,6 +83,10 @@ draw_create_vertex_shader(struct draw_context *draw, { struct draw_vertex_shader *vs; + if (draw->dump_vs) { + tgsi_dump(shader->tokens, 0); + } + vs = draw_create_vs_llvm( draw, shader ); if (!vs) { vs = draw_create_vs_sse( draw, shader ); @@ -152,6 +156,8 @@ draw_delete_vertex_shader(struct draw_context *draw, boolean draw_vs_init( struct draw_context *draw ) { + draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE); + draw->vs.machine = tgsi_exec_machine_create(); if (!draw->vs.machine) return FALSE; diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index e3b807ebd0e..00036cfe68b 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -43,6 +43,7 @@ struct draw_varient_input enum pipe_format format; unsigned buffer; unsigned offset; + unsigned instance_divisor; }; struct draw_varient_output diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ad184bd696d..da9f3e3d35c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; - float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; - float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]; + PIPE_ALIGN_VAR(16) float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]; + PIPE_ALIGN_VAR(16) float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]; uint attr; /* convert (up to) four input verts to SoA format */ diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index d16692584e5..9f40030f39f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -142,6 +142,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->fetch->run_elts( vsvg->fetch, elts, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, @@ -181,6 +182,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -203,6 +205,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->fetch->run( vsvg->fetch, start, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, @@ -239,6 +242,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -281,9 +285,11 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.nr_elements = key->nr_inputs; fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL; fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].instance_divisor = 0; fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; fetch.element[i].output_offset = i * 4 * sizeof(float); assert(fetch.element[i].output_offset < fetch.output_stride); @@ -295,17 +301,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, for (i = 0; i < key->nr_outputs; i++) { if (key->element[i].out.format != EMIT_1F_PSIZE) { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].instance_divisor = 0; emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; assert(emit.element[i].input_offset <= fetch.output_stride); } else { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; + emit.element[i].instance_divisor = 0; emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].output_offset = key->element[i].out.offset; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index ba6f7b15f9e..a4b78f14943 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -80,27 +80,11 @@ struct fenced_buffer_list */ struct fenced_buffer { - /* - * Immutable members. - */ - struct pb_buffer base; + struct pb_buffer *buffer; - struct fenced_buffer_list *list; - - /** - * Protected by fenced_buffer_list::mutex - */ - struct list_head head; - /** - * Following members are mutable and protected by this mutex. - * - * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex - * held, but in order to prevent deadlocks you must never lock - * fenced_buffer_list::mutex with this mutex held. - */ - pipe_mutex mutex; + /* FIXME: protect access with mutex */ /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -112,6 +96,9 @@ struct fenced_buffer struct pb_validate *vl; unsigned validation_flags; struct pipe_fence_handle *fence; + + struct list_head head; + struct fenced_buffer_list *list; }; @@ -123,24 +110,15 @@ fenced_buffer(struct pb_buffer *buf) } -/** - * Add the buffer to the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order, before calling this function. - * - * Reference count should be incremented before calling this function. - */ static INLINE void -fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_add(struct fenced_buffer *fenced_buf) { + struct fenced_buffer_list *fenced_list = fenced_buf->list; + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); - /* TODO: Move the reference count increment here */ - #ifdef DEBUG LIST_DEL(&fenced_buf->head); assert(fenced_list->numUnfenced); @@ -152,16 +130,32 @@ fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, /** - * Remove the buffer from the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order before calling this function. - * - * Reference count should be decremented after calling this function. + * Actually destroy the buffer. */ static INLINE void -fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(!fenced_buf->fence); +#ifdef DEBUG + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_list->numUnfenced); + --fenced_list->numUnfenced; +#else + (void)fenced_list; +#endif + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); +} + + +static INLINE void +_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { struct pb_fence_ops *ops = fenced_list->ops; @@ -183,56 +177,37 @@ fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, ++fenced_list->numUnfenced; #endif - /* TODO: Move the reference count decrement and destruction here */ + /** + * FIXME!!! + */ + + if(!pipe_is_referenced(&fenced_buf->base.base.reference)) + _fenced_buffer_destroy(fenced_buf); } -/** - * Wait for the fence to expire, and remove it from the fenced list. - * - * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be - * held -- it will be acquired internally. - */ static INLINE enum pipe_error -fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_finish(struct fenced_buffer *fenced_buf) { + struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - enum pipe_error ret = PIPE_ERROR; #if 0 debug_warning("waiting for GPU"); #endif - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); - - /* - * Acquire the global lock. Must release buffer mutex first to preserve - * lock order. - */ - pipe_mutex_unlock(fenced_buf->mutex); - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) { - /* Remove from the fenced list */ - /* TODO: remove consequents */ - fenced_buffer_remove_locked(fenced_list, fenced_buf); - - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - - ret = PIPE_OK; + if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { + return PIPE_ERROR; } + /* Remove from the fenced list */ + /* TODO: remove consequents */ + _fenced_buffer_remove(fenced_list, fenced_buf); } - pipe_mutex_unlock(fenced_list->mutex); - - return ret; + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return PIPE_OK; } @@ -240,8 +215,8 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, * Free as many fenced buffers from the list head as possible. */ static void -fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, - int wait) +_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait) { struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; @@ -254,29 +229,21 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, while(curr != &fenced_list->delayed) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence != prev_fence) { int signaled; if (wait) signaled = ops->fence_finish(ops, fenced_buf->fence, 0); else signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - if (signaled != 0) { - pipe_mutex_unlock(fenced_buf->mutex); + if (signaled != 0) break; - } prev_fence = fenced_buf->fence; } else { assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - fenced_buffer_remove_locked(fenced_list, fenced_buf); - pipe_mutex_unlock(fenced_buf->mutex); - - pb_buf = &fenced_buf->base; - pb_reference(&pb_buf, NULL); + _fenced_buffer_remove(fenced_list, fenced_buf); curr = next; next = curr->next; @@ -290,25 +257,30 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(!fenced_buf->fence); - -#ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); - assert(fenced_buf->head.prev); - assert(fenced_buf->head.next); - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + if (fenced_buf->fence) { + struct pb_fence_ops *ops = fenced_list->ops; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + struct list_head *curr, *prev; + curr = &fenced_buf->head; + prev = curr->prev; + do { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); + _fenced_buffer_remove(fenced_list, fenced_buf); + curr = prev; + prev = curr->prev; + } while (curr != &fenced_list->delayed); + } + else { + /* delay destruction */ + } + } + else { + _fenced_buffer_destroy(fenced_buf); + } pipe_mutex_unlock(fenced_list->mutex); -#else - (void)fenced_list; -#endif - - pb_reference(&fenced_buf->buffer, NULL); - - pipe_mutex_destroy(fenced_buf->mutex); - FREE(fenced_buf); } @@ -319,23 +291,24 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - void *map = NULL; - - pipe_mutex_lock(fenced_buf->mutex); + void *map; assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); /* Serialize writes */ if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { - if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && - ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) { /* Don't wait for the GPU to finish writing */ - goto done; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) + _fenced_buffer_remove(fenced_list, fenced_buf); + else + return NULL; + } + else { + /* Wait for the GPU to finish writing */ + _fenced_buffer_finish(fenced_buf); } - - /* Wait for the GPU to finish writing */ - fenced_buffer_finish_locked(fenced_list, fenced_buf); } #if 0 @@ -352,9 +325,6 @@ fenced_buffer_map(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } -done: - pipe_mutex_unlock(fenced_buf->mutex); - return map; } @@ -363,9 +333,6 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - - pipe_mutex_lock(fenced_buf->mutex); - assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { pb_unmap(fenced_buf->buffer); @@ -373,8 +340,6 @@ fenced_buffer_unmap(struct pb_buffer *buf) if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } - - pipe_mutex_unlock(fenced_buf->mutex); } @@ -386,14 +351,11 @@ fenced_buffer_validate(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); enum pipe_error ret; - pipe_mutex_lock(fenced_buf->mutex); - if(!vl) { /* invalidate */ fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; - ret = PIPE_OK; - goto done; + return PIPE_OK; } assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); @@ -401,17 +363,14 @@ fenced_buffer_validate(struct pb_buffer *buf, flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) { - ret = PIPE_ERROR_RETRY; - goto done; - } + if(fenced_buf->vl && fenced_buf->vl != vl) + return PIPE_ERROR_RETRY; #if 0 /* Do not validate if buffer is still mapped */ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* TODO: wait for the thread that mapped the buffer to unmap it */ - ret = PIPE_ERROR_RETRY; - goto done; + return PIPE_ERROR_RETRY; } /* Final sanity checking */ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); @@ -421,21 +380,17 @@ fenced_buffer_validate(struct pb_buffer *buf, if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ - ret = PIPE_OK; - goto done; + return PIPE_OK; } ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - goto done; + return ret; fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; -done: - pipe_mutex_unlock(fenced_buf->mutex); - - return ret; + return PIPE_OK; } @@ -450,36 +405,29 @@ fenced_buffer_fence(struct pb_buffer *buf, fenced_buf = fenced_buffer(buf); fenced_list = fenced_buf->list; ops = fenced_list->ops; - - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); - - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - - if(fence != fenced_buf->fence) { - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - - if (fenced_buf->fence) { - fenced_buffer_remove_locked(fenced_list, fenced_buf); - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - } - if (fence) { - ops->fence_reference(ops, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - p_atomic_inc(&fenced_buf->base.base.reference.count); - fenced_buffer_add_locked(fenced_list, fenced_buf); - } - - pb_fence(fenced_buf->buffer, fence); - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; + if(fence == fenced_buf->fence) { + /* Nothing to do */ + return; } - pipe_mutex_unlock(fenced_buf->mutex); + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + _fenced_buffer_add(fenced_buf); + } pipe_mutex_unlock(fenced_list->mutex); + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; } @@ -489,7 +437,6 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - /* NOTE: accesses immutable members only -- mutex not necessary */ pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); } @@ -529,8 +476,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, buf->buffer = buffer; buf->list = fenced_list; - pipe_mutex_init(buf->mutex); - #ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); @@ -572,7 +517,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, wait); + _fenced_buffer_list_check_free(fenced_list, wait); pipe_mutex_unlock(fenced_list->mutex); } @@ -594,13 +539,11 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) next = curr->next; while(curr != &fenced_list->unfenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); assert(!fenced_buf->fence); debug_printf("%10p %7u %7u\n", (void *) fenced_buf, fenced_buf->base.base.size, p_atomic_read(&fenced_buf->base.base.reference.count)); - pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -610,7 +553,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) while(curr != &fenced_list->delayed) { int signaled; fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); debug_printf("%10p %7u %7u %10p %s\n", (void *) fenced_buf, @@ -618,7 +560,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) p_atomic_read(&fenced_buf->base.base.reference.count), (void *) fenced_buf->fence, signaled == 0 ? "y" : "n"); - pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -639,8 +580,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif + _fenced_buffer_list_check_free(fenced_list, 1); pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, 1); } #ifdef DEBUG @@ -648,7 +589,6 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #endif pipe_mutex_unlock(fenced_list->mutex); - pipe_mutex_destroy(fenced_list->mutex); fenced_list->ops->destroy(fenced_list->ops); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 1acf3c373eb..f675427d987 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -673,6 +673,13 @@ void x86_and( struct x86_function *p, emit_op_modrm( p, 0x23, 0x21, dst, src ); } +void x86_div( struct x86_function *p, + struct x86_reg src ) +{ + assert(src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); +} + /*********************************************************************** diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 731a6517968..f7612d416a0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -244,6 +244,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); +void x86_div( struct x86_function *p, struct x86_reg src ); void x86_cdecl_caller_push_regs( struct x86_function *p ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index e2e5394f86f..c254a7274f1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -123,7 +123,8 @@ static const char *semantic_names[] = "NORMAL", "FACE", "EDGEFLAG", - "PRIM_ID" + "PRIM_ID", + "INSTANCEID" }; static const char *immediate_type_names[] = @@ -218,8 +219,13 @@ _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { + ENM(src->Register.File, file_names); + if (src->Register.Dimension) { + CHR('['); + SID(src->Dimension.Index); + CHR(']'); + } if (src->Register.Indirect) { - ENM( src->Register.File, file_names ); CHR( '[' ); ENM( src->Indirect.File, file_names ); CHR( '[' ); @@ -233,16 +239,10 @@ _dump_register_src( } CHR( ']' ); } else { - ENM( src->Register.File, file_names ); CHR( '[' ); SID( src->Register.Index ); CHR( ']' ); } - if (src->Register.Dimension) { - CHR( '[' ); - SID( src->Dimension.Index ); - CHR( ']' ); - } } static void diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 2bcb33392a8..83646b73c1e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1041,11 +1041,19 @@ fetch_src_file_channel( default: assert( 0 ); + chan->u[0] = 0; + chan->u[1] = 0; + chan->u[2] = 0; + chan->u[3] = 0; } break; default: assert( 0 ); + chan->u[0] = 0; + chan->u[1] = 0; + chan->u[2] = 0; + chan->u[3] = 0; } } @@ -1121,11 +1129,14 @@ fetch_source(const struct tgsi_exec_machine *mach, * subscript to a register file. Effectively it means that * the register file is actually a 2D array of registers. * - * file[1][3] == file[1*sizeof(file[1])+3], + * file[3][1] == file[3*sizeof(file[1])+1], * where: * [3] = Dimension.Index */ if (reg->Register.Dimension) { + int array_size; + union tgsi_exec_channel dim_index; + /* The size of the first-order array depends on the register file type. * We need to multiply the index to the first array to get an effective, * "flat" index that points to the beginning of the second-order array. @@ -1133,32 +1144,27 @@ fetch_source(const struct tgsi_exec_machine *mach, switch (reg->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_SYSTEM_VALUE: - index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + array_size = TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: - index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; + array_size = TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); + array_size = 0; } - index.i[0] += reg->Dimension.Index; - index.i[1] += reg->Dimension.Index; - index.i[2] += reg->Dimension.Index; - index.i[3] += reg->Dimension.Index; + dim_index.i[0] = + dim_index.i[1] = + dim_index.i[2] = + dim_index.i[3] = reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. * Nothing stops us from indirectly addressing the indirect register, * but there is no need for that, so we won't exercise it. * - * file[1][ind[4].y+3], + * file[ind[4].y+3][1], * where: * ind = DimIndirect.File * [4] = DimIndirect.Index @@ -1183,20 +1189,25 @@ fetch_source(const struct tgsi_exec_machine *mach, &index2, &indir_index ); - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + dim_index.i[0] += indir_index.i[0]; + dim_index.i[1] += indir_index.i[1]; + dim_index.i[2] += indir_index.i[2]; + dim_index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ for (i = 0; i < QUAD_SIZE; i++) { if ((execmask & (1 << i)) == 0) - index.i[i] = 0; + dim_index.i[i] = 0; } } + index.i[0] += dim_index.i[0] * array_size; + index.i[1] += dim_index.i[1] * array_size; + index.i[2] += dim_index.i[2] * array_size; + index.i[3] += dim_index.i[3] * array_size; + /* If by any chance there was a need for a 3D array of register * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095bb..ad553c71a57 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,8 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) const float +ppc_builtin_constants[] = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 7f1c8e5dd68..431c3ffb142 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -335,13 +335,9 @@ iter_instruction( fill_scan_register1d(ind_reg, inst->Src[i].Indirect.File, inst->Src[i].Indirect.Index); - if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || - reg->indices[0] != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } check_register_usage( ctx, - reg, + ind_reg, "indirect", FALSE ); } @@ -412,7 +408,7 @@ iter_declaration( uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); - fill_scan_register2d(reg, file, vert, i); + fill_scan_register2d(reg, file, i, vert); check_and_declare(ctx, reg); } } else { diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9fcffeda368..7fe5dad5ffb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -933,7 +933,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "NORMAL", "FACE", "EDGEFLAG", - "PRIM_ID" + "PRIM_ID", + "INSTANCEID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index e64e2b731df..ab557a23f94 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -40,6 +40,8 @@ union tgsi_any_token { struct tgsi_header header; struct tgsi_processor processor; struct tgsi_token token; + struct tgsi_property prop; + struct tgsi_property_data prop_data; struct tgsi_declaration decl; struct tgsi_declaration_range decl_range; struct tgsi_declaration_semantic decl_semantic; @@ -64,6 +66,7 @@ struct ureg_tokens { }; #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 32 @@ -95,6 +98,13 @@ struct ureg_program unsigned nr_gs_inputs; struct { + unsigned index; + unsigned semantic_name; + unsigned semantic_index; + } system_value[UREG_MAX_SYSTEM_VALUE]; + unsigned nr_system_values; + + struct { unsigned semantic_name; unsigned semantic_index; } output[UREG_MAX_OUTPUT]; @@ -123,6 +133,8 @@ struct ureg_program } constant_range[UREG_MAX_CONSTANT_RANGE]; unsigned nr_constant_ranges; + unsigned property_gs_input_prim; + unsigned nr_addrs; unsigned nr_preds; unsigned nr_loops; @@ -234,19 +246,29 @@ ureg_src_register( unsigned file, src.SwizzleY = TGSI_SWIZZLE_Y; src.SwizzleZ = TGSI_SWIZZLE_Z; src.SwizzleW = TGSI_SWIZZLE_W; - src.Pad = 0; src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; src.IndirectIndex = 0; src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = index; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } +void +ureg_property_gs_input_prim(struct ureg_program *ureg, + unsigned gs_input_prim) +{ + ureg->property_gs_input_prim = gs_input_prim; +} + + struct ureg_src ureg_DECL_fs_input( struct ureg_program *ureg, @@ -304,6 +326,25 @@ ureg_DECL_gs_input(struct ureg_program *ureg, } +struct ureg_src +ureg_DECL_system_value(struct ureg_program *ureg, + unsigned index, + unsigned semantic_name, + unsigned semantic_index) +{ + if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { + ureg->system_value[ureg->nr_system_values].index = index; + ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; + ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; + ureg->nr_system_values++; + } else { + set_bad(ureg); + } + + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); +} + + struct ureg_dst ureg_DECL_output( struct ureg_program *ureg, unsigned name, @@ -616,6 +657,35 @@ ureg_DECL_immediate_uint( struct ureg_program *ureg, struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + uint index; + uint i; + + if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { + set_bad(ureg); + return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); + } + + index = ureg->nr_immediates; + ureg->nr_immediates += (nr + 3) / 4; + + for (i = index; i < ureg->nr_immediates; i++) { + ureg->immediate[i].type = TGSI_IMM_UINT32; + ureg->immediate[i].nr = nr > 4 ? 4 : nr; + memcpy(ureg->immediate[i].value.u, + &v[(i - index) * 4], + ureg->immediate[i].nr * sizeof(uint)); + nr -= 4; + } + + return ureg_src_register(TGSI_FILE_IMMEDIATE, index); +} + + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *ureg, const int *v, unsigned nr ) @@ -628,7 +698,7 @@ void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = 1 + (src.Indirect ? 1 : 0); + unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -651,7 +721,7 @@ ureg_emit_src( struct ureg_program *ureg, if (src.Indirect) { out[0].src.Indirect = 1; out[n].value = 0; - out[n].src.File = TGSI_FILE_ADDRESS; + out[n].src.File = src.IndirectFile; out[n].src.SwizzleX = src.IndirectSwizzle; out[n].src.SwizzleY = src.IndirectSwizzle; out[n].src.SwizzleZ = src.IndirectSwizzle; @@ -660,6 +730,15 @@ ureg_emit_src( struct ureg_program *ureg, n++; } + if (src.Dimension) { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 0; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + n++; + } + assert(n == size); } @@ -1027,13 +1106,34 @@ emit_immediate( struct ureg_program *ureg, out[4].imm_data.Uint = v[3]; } +static void +emit_property(struct ureg_program *ureg, + unsigned name, + unsigned data) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + + out[0].value = 0; + out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY; + out[0].prop.NrTokens = 2; + out[0].prop.PropertyName = name; + out[1].prop_data.Data = data; +} static void emit_decls( struct ureg_program *ureg ) { unsigned i; + if (ureg->property_gs_input_prim != ~0) { + assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); + + emit_property(ureg, + TGSI_PROPERTY_GS_INPUT_PRIM, + ureg->property_gs_input_prim); + } + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < UREG_MAX_INPUT; i++) { if (ureg->vs_inputs[i/32] & (1 << (i%32))) { @@ -1058,6 +1158,15 @@ static void emit_decls( struct ureg_program *ureg ) } } + for (i = 0; i < ureg->nr_system_values; i++) { + emit_decl(ureg, + TGSI_FILE_SYSTEM_VALUE, + ureg->system_value[i].index, + ureg->system_value[i].semantic_name, + ureg->system_value[i].semantic_index, + TGSI_INTERPOLATE_CONSTANT); + } + for (i = 0; i < ureg->nr_outputs; i++) { emit_decl( ureg, TGSI_FILE_OUTPUT, @@ -1234,6 +1343,7 @@ struct ureg_program *ureg_create( unsigned processor ) return NULL; ureg->processor = processor; + ureg->property_gs_input_prim = ~0; return ureg; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 6f11273320a..8c8a6bbce66 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -47,13 +47,15 @@ struct ureg_src unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ - unsigned Pad : 1; /* BOOL */ unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ unsigned Absolute : 1; /* BOOL */ - int Index : 16; /* SINT */ unsigned Negate : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned IndirectFile : 4; /* TGSI_FILE_ */ int IndirectIndex : 16; /* SINT */ - int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -118,6 +120,14 @@ ureg_create_shader_and_destroy( struct ureg_program *p, } +/*********************************************************************** + * Build shader properties: + */ + +void +ureg_property_gs_input_prim(struct ureg_program *ureg, + unsigned gs_input_prim); + /*********************************************************************** * Build shader declarations: @@ -137,6 +147,12 @@ struct ureg_src ureg_DECL_gs_input(struct ureg_program *, unsigned index); +struct ureg_src +ureg_DECL_system_value(struct ureg_program *, + unsigned index, + unsigned semantic_name, + unsigned semantic_index); + struct ureg_dst ureg_DECL_output( struct ureg_program *, unsigned semantic_name, @@ -153,6 +169,11 @@ ureg_DECL_immediate_uint( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *, const int *v, unsigned nr ); @@ -753,18 +774,30 @@ static INLINE struct ureg_src ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) { assert(reg.File != TGSI_FILE_NULL); - assert(addr.File == TGSI_FILE_ADDRESS); + assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY); reg.Indirect = 1; + reg.IndirectFile = addr.File; reg.IndirectIndex = addr.Index; reg.IndirectSwizzle = addr.SwizzleX; return reg; } +static INLINE struct ureg_src +ureg_src_dimension( struct ureg_src reg, int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimensionIndex = index; + return reg; +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { struct ureg_dst dst; + assert(!src.Indirect || src.IndirectFile == TGSI_FILE_ADDRESS); + dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; @@ -792,13 +825,15 @@ ureg_src( struct ureg_dst dst ) src.SwizzleY = TGSI_SWIZZLE_Y; src.SwizzleZ = TGSI_SWIZZLE_Z; src.SwizzleW = TGSI_SWIZZLE_W; - src.Pad = 0; src.Indirect = dst.Indirect; + src.IndirectFile = TGSI_FILE_ADDRESS; src.IndirectIndex = dst.IndirectIndex; src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } @@ -837,13 +872,15 @@ ureg_src_undef( void ) src.SwizzleY = 0; src.SwizzleZ = 0; src.SwizzleW = 0; - src.Pad = 0; src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; src.IndirectIndex = 0; src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = 0; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 34526eb0617..54ed2c1a4be 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -44,12 +44,19 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + struct translate_element { + enum translate_element_type type; enum pipe_format input_format; enum pipe_format output_format; unsigned input_buffer:8; unsigned input_offset:24; + unsigned instance_divisor; unsigned output_offset; }; @@ -74,11 +81,13 @@ struct translate { void (PIPE_CDECL *run_elts)( struct translate *, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer); void (PIPE_CDECL *run)( struct translate *, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer); }; @@ -103,8 +112,13 @@ static INLINE int translate_keysize( const struct translate_key *key ) static INLINE int translate_key_compare( const struct translate_key *a, const struct translate_key *b ) { - int keysize = translate_keysize(a); - return memcmp(a, b, keysize); + int keysize_a = translate_keysize(a); + int keysize_b = translate_keysize(b); + + if (keysize_a != keysize_b) { + return keysize_a - keysize_b; + } + return memcmp(a, b, keysize_a); } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 266e7ee81e6..24727d49888 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -46,9 +46,12 @@ struct translate_generic { struct translate translate; struct { + enum translate_element_type type; + fetch_func fetch; unsigned buffer; unsigned input_offset; + unsigned instance_divisor; emit_func emit; unsigned output_offset; @@ -568,6 +571,7 @@ static emit_func get_emit_func( enum pipe_format format ) static void PIPE_CDECL generic_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -583,13 +587,20 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); + const char *src; char *dst = (vert + tg->attrib[attr].output_offset); + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", @@ -607,6 +618,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -623,13 +635,25 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); - char *dst = (vert + tg->attrib[attr].output_offset); - tg->attrib[attr].fetch( src, data ); + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); + } else { + data[0] = (float)instance_id; + } if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", i, attr, data[0], data[1], data[2], data[3]); @@ -683,10 +707,12 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); tg->attrib[i].output_offset = key->element[i].output_offset; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index b62db8d8f33..c13e7427387 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -49,19 +49,29 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); struct translate_buffer { const void *base_ptr; unsigned stride; - void *ptr; /* updated per vertex */ }; +struct translate_buffer_varient { + unsigned buffer_index; + unsigned instance_divisor; + void *ptr; /* updated either per vertex or per instance */ +}; + + +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + struct translate_sse { struct translate translate; @@ -81,6 +91,16 @@ struct translate_sse { struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; unsigned nr_buffers; + /* Multiple buffer varients can map to a single buffer. */ + struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; + unsigned nr_buffer_varients; + + /* Multiple elements can map to a single buffer varient. */ + unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; + + boolean use_instancing; + unsigned instance_id; + run_func gen_run; run_elts_func gen_run_elts; @@ -359,32 +379,61 @@ static boolean init_inputs( struct translate_sse *p, boolean linear ) { unsigned i; - if (linear) { - for (i = 0; i < p->nr_buffers; i++) { + struct x86_reg instance_id = x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; + struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; + + if (linear || varient->instance_divisor) { struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &buffer->stride)); struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].base_ptr)); + get_offset(p, &buffer->base_ptr)); struct x86_reg elt = p->idx_EBX; - struct x86_reg tmp = p->tmp_EAX; - + struct x86_reg tmp_EAX = p->tmp_EAX; /* Calculate pointer to first attrib: + * base_ptr + stride * index, where index depends on instance divisor */ - x86_mov(p->func, tmp, buf_stride); - x86_imul(p->func, tmp, elt); - x86_add(p->func, tmp, buf_base_ptr); + if (varient->instance_divisor) { + /* Our index is instance ID divided by instance divisor. + */ + x86_mov(p->func, tmp_EAX, instance_id); + + if (varient->instance_divisor != 1) { + struct x86_reg tmp_EDX = p->machine_EDX; + struct x86_reg tmp_ECX = p->outbuf_ECX; + + /* TODO: Add x86_shr() to rtasm and use it whenever + * instance divisor is power of two. + */ + + x86_push(p->func, tmp_EDX); + x86_push(p->func, tmp_ECX); + x86_xor(p->func, tmp_EDX, tmp_EDX); + x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); + x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ + x86_pop(p->func, tmp_ECX); + x86_pop(p->func, tmp_EDX); + } + } else { + x86_mov(p->func, tmp_EAX, elt); + } + x86_imul(p->func, tmp_EAX, buf_stride); + x86_add(p->func, tmp_EAX, buf_base_ptr); /* In the linear case, keep the buffer pointer instead of the * index number. */ - if (p->nr_buffers == 1) - x86_mov( p->func, elt, tmp ); + if (linear && p->nr_buffer_varients == 1) + x86_mov(p->func, elt, tmp_EAX); else - x86_mov( p->func, buf_ptr, tmp ); + x86_mov(p->func, buf_ptr, tmp_EAX); } } @@ -394,31 +443,36 @@ static boolean init_inputs( struct translate_sse *p, static struct x86_reg get_buffer_ptr( struct translate_sse *p, boolean linear, - unsigned buf_idx, + unsigned var_idx, struct x86_reg elt ) { - if (linear && p->nr_buffers == 1) { + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + } + if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } - else if (linear) { + else if (linear || p->buffer_varient[var_idx].instance_divisor) { struct x86_reg ptr = p->tmp_EAX; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].ptr)); + get_offset(p, &p->buffer_varient[var_idx].ptr)); x86_mov(p->func, ptr, buf_ptr); return ptr; } else { struct x86_reg ptr = p->tmp_EAX; + const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].base_ptr)); + get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); @@ -436,28 +490,33 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, static boolean incr_inputs( struct translate_sse *p, boolean linear ) { - if (linear && p->nr_buffers == 1) { + if (linear && p->nr_buffer_varients == 1) { struct x86_reg stride = x86_make_disp(p->machine_EDX, get_offset(p, &p->buffer[0].stride)); - x86_add(p->func, p->idx_EBX, stride); - sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + if (p->buffer_varient[0].instance_divisor == 0) { + x86_add(p->func, p->idx_EBX, stride); + sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + } } else if (linear) { unsigned i; /* Is this worthwhile?? */ - for (i = 0; i < p->nr_buffers; i++) { + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); - x86_mov(p->func, p->tmp_EAX, buf_ptr); - x86_add(p->func, p->tmp_EAX, buf_stride); - if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); - x86_mov(p->func, buf_ptr, p->tmp_EAX); + if (varient->instance_divisor == 0) { + x86_mov(p->func, p->tmp_EAX, buf_ptr); + x86_add(p->func, p->tmp_EAX, buf_stride); + if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); + x86_mov(p->func, buf_ptr, p->tmp_EAX); + } } } else { @@ -514,7 +573,18 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); - x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4)); + x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); + + /* Load instance ID. + */ + if (p->use_instancing) { + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 4)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), + p->tmp_EAX); + } /* Get vertex count, compare to zero */ @@ -531,17 +601,18 @@ static boolean build_vertex_emit( struct translate_sse *p, label = x86_get_label(p->func); { struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); - int last_vb = -1; + int last_varient = -1; struct x86_reg vb; for (j = 0; j < p->translate.key.nr_elements; j++) { const struct translate_element *a = &p->translate.key.element[j]; + unsigned varient = p->element_to_buffer_varient[j]; /* Figure out source pointer address: */ - if (a->input_buffer != last_vb) { - last_vb = a->input_buffer; - vb = get_buffer_ptr(p, linear, a->input_buffer, elt); + if (varient != last_varient) { + last_varient = varient; + vb = get_buffer_ptr(p, linear, varient, elt); } if (!translate_attr( p, a, @@ -624,6 +695,7 @@ static void translate_sse_release( struct translate *translate ) static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -631,12 +703,14 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, p->gen_run_elts( translate, elts, count, - output_buffer ); + instance_id, + output_buffer); } static void PIPE_CDECL translate_sse_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -644,7 +718,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, p->gen_run( translate, start, count, - output_buffer ); + instance_id, + output_buffer); } @@ -666,8 +741,37 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; - for (i = 0; i < key->nr_elements; i++) - p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + for (i = 0; i < key->nr_elements; i++) { + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; + + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); + + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } + + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } + } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; + } + } if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3f74e2aa8b8..9725890bd4a 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -262,6 +262,10 @@ regions_overlap(int srcX0, int srcY0, * Copy pixel block from src surface to dst surface. * Overlapping regions are acceptable. * Flipping and stretching are supported. + * \param filter one of PIPE_TEX_MIPFILTER_NEAREST/LINEAR + * \param writemask controls which channels in the dest surface are sourced + * from the src surface. Disabled channels are sourced + * from (0,0,0,1). * XXX what about clipping??? * XXX need some control over blitting Z and/or stencil. */ diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 46b4706b768..249a0375fc5 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -379,9 +379,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, float t1 = y1 / (float)surf->height; float s2 = x2 / (float)surf->width; float t2 = y2 / (float)surf->height; - const float st[4][2] = { - {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2} - }; + float st[4][2]; + + st[0][0] = s1; + st[0][1] = t1; + st[1][0] = s2; + st[1][1] = t1; + st[2][0] = s2; + st[2][1] = t2; + st[3][0] = s1; + st[3][1] = t2; util_map_texcoords2d_onto_cubemap(surf->face, /* pointer, stride in floats */ diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 4110485fb19..e2e23c3cdd8 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -61,6 +61,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex attributes */ for (i = 0; i < num_attribs; i++) { velements[i].src_offset = i * 4 * sizeof(float); + velements[i].instance_divisor = 0; velements[i].vertex_buffer_index = 0; velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; velements[i].nr_components = 4; diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 9f16b42944e..01f7931aed1 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -62,10 +62,10 @@ PIPE_FORMAT_R16G16_SSCALED , array , 1, 1, s16 , s16 , , , xy01, PIPE_FORMAT_R16G16B16_SSCALED , array , 1, 1, s16 , s16 , s16 , , xyz1, rgb PIPE_FORMAT_R16G16B16A16_SSCALED , array , 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb PIPE_FORMAT_R8_UNORM , array , 1, 1, un8 , , , , x001, rgb -PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , xy01, rgb -PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , xyz1, rgb -PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb -PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyz1, rgb +PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , yx01, rgb +PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , zyx1, rgb +PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb +PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb PIPE_FORMAT_R8_USCALED , array , 1, 1, u8 , , , , x001, rgb PIPE_FORMAT_R8G8_USCALED , array , 1, 1, u8 , u8 , , , xy01, rgb PIPE_FORMAT_R8G8B8_USCALED , array , 1, 1, u8 , u8 , u8 , , xyz1, rgb diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 43eb0153ee7..0ab53c75dd6 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -425,6 +425,8 @@ util_pack_z(enum pipe_format format, double z) if (z == 1.0) return 0xffffffff; return (uint) (z * 0xffffffff); + case PIPE_FORMAT_Z32_FLOAT: + return (uint)z; case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: if (z == 1.0) diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c new file mode 100644 index 00000000000..3f43a19e018 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.c @@ -0,0 +1,145 @@ + +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "util/u_ringbuffer.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +/* Generic ringbuffer: + */ +struct util_ringbuffer +{ + struct util_packet *buf; + unsigned mask; + + /* Can this be done with atomic variables?? + */ + unsigned head; + unsigned tail; + pipe_condvar change; + pipe_mutex mutex; +}; + + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ) +{ + struct util_ringbuffer *ring = CALLOC_STRUCT(util_ringbuffer); + if (ring == NULL) + return NULL; + + assert(util_is_power_of_two(dwords)); + + ring->buf = MALLOC( dwords * sizeof(unsigned) ); + if (ring->buf == NULL) + goto fail; + + ring->mask = dwords - 1; + + pipe_condvar_init(ring->change); + pipe_mutex_init(ring->mutex); + return ring; + +fail: + FREE(ring->buf); + FREE(ring); + return NULL; +} + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ) +{ + pipe_condvar_destroy(ring->change); + pipe_mutex_destroy(ring->mutex); + FREE(ring->buf); + FREE(ring); +} + +static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring ) +{ + return (ring->tail - (ring->head + 1)) & ring->mask; +} + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ) +{ + unsigned i; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* Wait for free space: + */ + while (util_ringbuffer_space(ring) < packet->dwords) + pipe_condvar_wait(ring->change, ring->mutex); + + /* Copy data to ring: + */ + for (i = 0; i < packet->dwords; i++) { + + /* Copy all dwords of the packet. Note we're abusing the + * typesystem a little - we're being passed a pointer to + * something, but probably not an array of packet structs: + */ + ring->buf[ring->head] = packet[i]; + ring->head++; + ring->head &= ring->mask; + } + + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); +} + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ) +{ + const struct util_packet *ring_packet; + unsigned i; + int ret = PIPE_OK; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* Wait for free space: + */ + if (wait) { + while (util_ringbuffer_space(ring) == 0) + pipe_condvar_wait(ring->change, ring->mutex); + } + else { + if (util_ringbuffer_space(ring) == 0) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + } + + ring_packet = &ring->buf[ring->tail]; + + /* Both of these are considered bugs. Raise an assert on debug builds. + */ + if (ring_packet->dwords > ring->mask + 1 - util_ringbuffer_space(ring) || + ring_packet->dwords > max_dwords) { + assert(0); + ret = PIPE_ERROR_BAD_INPUT; + goto out; + } + + /* Copy data from ring: + */ + for (i = 0; i < ring_packet->dwords; i++) { + packet[i] = ring->buf[ring->tail]; + ring->tail++; + ring->tail &= ring->mask; + } + +out: + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); + return ret; +} diff --git a/src/gallium/auxiliary/util/u_ringbuffer.h b/src/gallium/auxiliary/util/u_ringbuffer.h new file mode 100644 index 00000000000..85f0ad6c1f6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.h @@ -0,0 +1,29 @@ + +#ifndef UTIL_RINGBUFFER_H +#define UTIL_RINGBUFFER_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" /* only for pipe_error! */ + +/* Generic header + */ +struct util_packet { + unsigned dwords:8; + unsigned data24:24; +}; + +struct util_ringbuffer; + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ); + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ); + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ); + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ); + +#endif diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1ba82bb21f0..f9936eb1cb2 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -1357,7 +1357,10 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); + util_format_write_4f(format, + p, src_stride * sizeof(float), + packed, util_format_get_stride(format, w), + 0, 0, w, h); } pipe_put_tile_raw(pt, x, y, w, h, packed, 0); diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index fc2a1c59a6b..a524e2fdfb3 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -316,6 +316,7 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); c->vertex_elems[0].src_offset = 0; + c->vertex_elems[0].instance_divisor = 0; c->vertex_elems[0].vertex_buffer_index = 0; c->vertex_elems[0].nr_components = 2; c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; @@ -345,6 +346,7 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); c->vertex_elems[1].src_offset = 0; + c->vertex_elems[1].instance_divisor = 0; c->vertex_elems[1].vertex_buffer_index = 1; c->vertex_elems[1].nr_components = 2; c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; @@ -353,7 +355,7 @@ init_buffers(struct vl_compositor *c) * Create our vertex shader's constant buffer * Const buffer contains scaling and translation vectors */ - c->vs_const_buf.buffer = pipe_buffer_create + c->vs_const_buf = pipe_buffer_create ( c->pipe->screen, 1, @@ -365,7 +367,7 @@ init_buffers(struct vl_compositor *c) * Create our fragment shader's constant buffer * Const buffer contains the color conversion matrix and bias vectors */ - c->fs_const_buf.buffer = pipe_buffer_create + c->fs_const_buf = pipe_buffer_create ( c->pipe->screen, 1, @@ -390,8 +392,8 @@ cleanup_buffers(struct vl_compositor *c) for (i = 0; i < 2; ++i) pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL); - pipe_buffer_reference(&c->vs_const_buf.buffer, NULL); - pipe_buffer_reference(&c->fs_const_buf.buffer, NULL); + pipe_buffer_reference(&c->vs_const_buf, NULL); + pipe_buffer_reference(&c->fs_const_buf, NULL); } bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe) @@ -483,13 +485,13 @@ void vl_compositor_render(struct vl_compositor *compositor, compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf); vs_consts = pipe_buffer_map ( compositor->pipe->screen, - compositor->vs_const_buf.buffer, + compositor->vs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); @@ -511,7 +513,7 @@ void vl_compositor_render(struct vl_compositor *compositor, vs_consts->src_trans.z = 0; vs_consts->src_trans.w = 0; - pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer); + pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf); compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence); @@ -525,10 +527,10 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float memcpy ( - pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), mat, sizeof(struct fragment_shader_consts) ); - pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf.buffer); + pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf); } diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index f441901a751..6a9a3fd7af1 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -47,7 +47,7 @@ struct vl_compositor struct pipe_scissor_state scissor; struct pipe_vertex_buffer vertex_bufs[2]; struct pipe_vertex_element vertex_elems[2]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; + struct pipe_buffer *vs_const_buf, *fs_const_buf; }; bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index caf581aca60..e43187545c5 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -891,53 +891,61 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) /* Position element */ r->vertex_elems[0].src_offset = 0; + r->vertex_elems[0].instance_divisor = 0; r->vertex_elems[0].vertex_buffer_index = 0; r->vertex_elems[0].nr_components = 2; r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Luma, texcoord element */ r->vertex_elems[1].src_offset = sizeof(struct vertex2f); + r->vertex_elems[1].instance_divisor = 0; r->vertex_elems[1].vertex_buffer_index = 0; r->vertex_elems[1].nr_components = 2; r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Chroma Cr texcoord element */ r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; + r->vertex_elems[2].instance_divisor = 0; r->vertex_elems[2].vertex_buffer_index = 0; r->vertex_elems[2].nr_components = 2; r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Chroma Cb texcoord element */ r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; + r->vertex_elems[3].instance_divisor = 0; r->vertex_elems[3].vertex_buffer_index = 0; r->vertex_elems[3].nr_components = 2; r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; /* First ref surface top field texcoord element */ r->vertex_elems[4].src_offset = 0; + r->vertex_elems[4].instance_divisor = 0; r->vertex_elems[4].vertex_buffer_index = 1; r->vertex_elems[4].nr_components = 2; r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; /* First ref surface bottom field texcoord element */ r->vertex_elems[5].src_offset = sizeof(struct vertex2f); + r->vertex_elems[5].instance_divisor = 0; r->vertex_elems[5].vertex_buffer_index = 1; r->vertex_elems[5].nr_components = 2; r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Second ref surface top field texcoord element */ r->vertex_elems[6].src_offset = 0; + r->vertex_elems[6].instance_divisor = 0; r->vertex_elems[6].vertex_buffer_index = 2; r->vertex_elems[6].nr_components = 2; r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Second ref surface bottom field texcoord element */ r->vertex_elems[7].src_offset = sizeof(struct vertex2f); + r->vertex_elems[7].instance_divisor = 0; r->vertex_elems[7].vertex_buffer_index = 2; r->vertex_elems[7].nr_components = 2; r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; - r->vs_const_buf.buffer = pipe_buffer_create + r->vs_const_buf = pipe_buffer_create ( r->pipe->screen, DEFAULT_BUF_ALIGNMENT, @@ -945,7 +953,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) sizeof(struct vertex_shader_consts) ); - r->fs_const_buf.buffer = pipe_buffer_create + r->fs_const_buf = pipe_buffer_create ( r->pipe->screen, DEFAULT_BUF_ALIGNMENT, @@ -954,11 +962,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) memcpy ( - pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(r->pipe->screen, r->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), &fs_consts, sizeof(struct fragment_shader_consts) ); - pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer); + pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf); return true; } @@ -970,8 +978,8 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r) assert(r); - pipe_buffer_reference(&r->vs_const_buf.buffer, NULL); - pipe_buffer_reference(&r->fs_const_buf.buffer, NULL); + pipe_buffer_reference(&r->vs_const_buf, NULL); + pipe_buffer_reference(&r->fs_const_buf, NULL); for (i = 0; i < 3; ++i) pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL); @@ -1284,19 +1292,19 @@ flush(struct vl_mpeg12_mc_renderer *r) vs_consts = pipe_buffer_map ( - r->pipe->screen, r->vs_const_buf.buffer, + r->pipe->screen, r->vs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); vs_consts->denorm.x = r->surface->width0; vs_consts->denorm.y = r->surface->height0; - pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); + pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf); r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, - &r->vs_const_buf); + r->vs_const_buf); r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, - &r->fs_const_buf); + r->fs_const_buf); if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index 64184337a06..f00b8c7b8b1 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -63,8 +63,8 @@ struct vl_mpeg12_mc_renderer struct pipe_viewport_state viewport; struct pipe_scissor_state scissor; - struct pipe_constant_buffer vs_const_buf; - struct pipe_constant_buffer fs_const_buf; + struct pipe_buffer *vs_const_buf; + struct pipe_buffer *fs_const_buf; struct pipe_framebuffer_state fb_state; struct pipe_vertex_element vertex_elems[8]; diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 21f5f9111a0..d394f5b4f1b 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -33,7 +33,11 @@ This state describes how resources in various flavours (textures, buffers, surfaces) are bound to the driver. -* ``set_constant_buffer`` +* ``set_constant_buffer`` sets a constant buffer to be used for a given shader + type. index is used to indicate which buffer to set (some apis may allow + multiple ones to be set, and binding a specific one later, though drivers + are mostly restricted to the first one right now). + * ``set_framebuffer_state`` * ``set_fragment_sampler_textures`` * ``set_vertex_sampler_textures`` @@ -47,7 +51,6 @@ These pieces of state are too small, variable, and/or trivial to have CSO objects. They all follow simple, one-method binding calls, e.g. ``set_edgeflags``. -* ``set_edgeflags`` * ``set_blend_color`` * ``set_clip_state`` * ``set_polygon_stipple`` @@ -72,12 +75,67 @@ stencil-only clears of packed depth-stencil buffers. Drawing ^^^^^^^ -``draw_arrays`` +``draw_arrays`` draws a specified primitive. + +This command is equivalent to calling ``draw_arrays_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. + +``draw_elements`` draws a specified primitive using an optional +index buffer. -``draw_elements`` +This command is equivalent to calling ``draw_elements_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. ``draw_range_elements`` +XXX: this is (probably) a temporary entrypoint, as the range +information should be available from the vertex_buffer state. +Using this to quickly evaluate a specialized path in the draw +module. + +``draw_arrays_instanced`` draws multiple instances of the same primitive. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``indexBuffer`` set to NULL and ``indexSize`` set to 0. + +``draw_elements_instanced`` draws multiple instances of the same primitive +using an optional index buffer. + +For instanceID in the range between ``startInstance`` +and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive +specified by ``mode`` and sequential numbers in the range between ``start`` +and ``start``+``count``-1, inclusive. + +If ``indexBuffer`` is not NULL, it specifies an index buffer with index +byte size of ``indexSize``. The sequential numbers are used to lookup +the index buffer and the resulting indices in turn are used to fetch +vertex attributes. + +If ``indexBuffer`` is NULL, the sequential numbers are used directly +as indices to fetch vertex attributes. + +If a given vertex element has ``instance_divisor`` set to 0, it is said +it contains per-vertex data and effective vertex attribute address needs +to be recalculated for every index. + + attribAddr = ``stride`` * index + ``src_offset`` + +If a given vertex element has ``instance_divisor`` set to non-zero, +it is said it contains per-instance data and effective vertex attribute +address needs to recalculated for every ``instance_divisor``-th instance. + + attribAddr = ``stride`` * instanceID / ``instance_divisor`` + ``src_offset`` + +In the above formulas, ``src_offset`` is taken from the given vertex element +and ``stride`` is taken from a vertex buffer associated with the given +vertex element. + +The calculated attribAddr is used as an offset into the vertex buffer to +fetch the attribute data. + +The value of ``instanceID`` can be read in a vertex shader through a system +value register declared with INSTANCEID semantic name. + Queries ^^^^^^^ diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst index 4d8e1708e7c..bfa4a1170a2 100644 --- a/src/gallium/docs/source/cso/rasterizer.rst +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -7,32 +7,69 @@ The rasterizer state controls the rendering of points, lines and triangles. Attributes include polygon culling state, line width, line stipple, multisample state, scissoring and flat/smooth shading. - Members ------- +bypass_vs_clip_and_viewport +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Whether the entire TCL pipeline should be bypassed. This implies that +vertices are pre-transformed for the viewport, and will not be run +through the vertex shader. + +.. note:: + + Implementations may still clip away vertices that are not in the viewport + when this is set. + flatshade - If set, the provoking vertex of each polygon is used to determine the - color of the entire polygon. If not set, fragment colors will be - interpolated between the vertex colors. - Note that this is separate from the fragment shader input attributes - CONSTANT, LINEAR and PERSPECTIVE. We need the flatshade state at +^^^^^^^^^ + +If set, the provoking vertex of each polygon is used to determine the color +of the entire polygon. If not set, fragment colors will be interpolated +between the vertex colors. + +The actual interpolated shading algorithm is obviously +implementation-dependent, but will usually be Gourard for most hardware. + +.. note:: + + This is separate from the fragment shader input attributes + CONSTANT, LINEAR and PERSPECTIVE. The flatshade state is needed at clipping time to determine how to set the color of new vertices. - Also note that the draw module can implement flat shading by copying - the provoking vertex color to all the other vertices in the primitive. + + :ref:`Draw` can implement flat shading by copying the provoking vertex + color to all the other vertices in the primitive. flatshade_first - Whether the first vertex should be the provoking vertex, for most - primitives. If not set, the last vertex is the provoking vertex. +^^^^^^^^^^^^^^^ + +Whether the first vertex should be the provoking vertex, for most primitives. +If not set, the last vertex is the provoking vertex. + +There are several important exceptions to the specification of this rule. + +* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first + vertex. If the caller wishes to change the provoking vertex, they merely + need to rotate the vertices themselves. +* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no + effect; the provoking vertex is always the last vertex. +* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the + second vertex, not the first. This permits each segment of the fan to have + a different color. + +Other Members +^^^^^^^^^^^^^ light_twoside - If set, there are per-vertex back-facing colors. The draw module + If set, there are per-vertex back-facing colors. :ref:`Draw` uses this state along with the front/back information to set the final vertex colors prior to rasterization. front_winding Indicates the window order of front-facing polygons, either PIPE_WINDING_CW or PIPE_WINDING_CCW + cull_mode Indicates which polygons to cull, either PIPE_WINDING_NONE (cull no polygons), PIPE_WINDING_CW (cull clockwise-winding polygons), @@ -68,7 +105,7 @@ line_stipple_enable line_stipple_pattern 16-bit bitfield of on/off flags, used to pattern the line stipple. line_stipple_factor - When drawinga stippled line, each bit in the stipple pattern is + When drawing a stippled line, each bit in the stipple pattern is repeated N times, where N = line_stipple_factor + 1. line_last_pixel Controls whether the last pixel in a line is drawn or not. OpenGL @@ -96,7 +133,7 @@ sprite_coord_mode lower left vertex will have coordinate (0,0,0,1). For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have coordinate (0,0,0,1). - This state is needed by the 'draw' module because that's where each + This state is needed by :ref:`Draw` because that's where each point vertex is converted into four quad vertices. There's no other place to emit the new vertex texture coordinates which are required for sprite rendering. @@ -108,45 +145,9 @@ scissor Whether the scissor test is enabled. multisample - Whether :ref:`MSAA` is enabled. - -bypass_vs_clip_and_viewport - Whether the entire TCL pipeline should be bypassed. This implies that - vertices are pre-transformed for the viewport, and will not be run - through the vertex shader. Note that implementations may still clip away - vertices that are not in the viewport. + Whether :term:`MSAA` is enabled. gl_rasterization_rules Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set, the rasterizer will use (0, 0) for pixel centers. - -Notes ------ - -flatshade -^^^^^^^^^ - -The actual interpolated shading algorithm is obviously -implementation-dependent, but will usually be Gourard for most hardware. - -bypass_vs_clip_and_viewport -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When set, this implies that vertices are pre-transformed for the viewport, and -will not be run through the vertex shader. Note that implementations may still -clip away vertices that are not visible. - -flatshade_first -^^^^^^^^^^^^^^^ - -There are several important exceptions to the specification of this rule. - -* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first - vertex. If the caller wishes to change the provoking vertex, they merely - need to rotate the vertices themselves. -* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no - effect; the provoking vertex is always the last vertex. -* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the - second vertex, not the first. This permits each segment of the fan to have - a different color. diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst index 33e846e33d2..0ef9fe2645c 100644 --- a/src/gallium/docs/source/distro.rst +++ b/src/gallium/docs/source/distro.rst @@ -61,10 +61,7 @@ VMWare SVGA ATI r300 ^^^^^^^^ -AMD/ATI r600 -^^^^^^^^^^^^ - -Highly experimental. +Testing-quality. Softpipe ^^^^^^^^ @@ -109,17 +106,31 @@ Auxiliary CSO Cache ^^^^^^^^^ +The CSO cache is used to accelerate preparation of state by saving +driver-specific state structures for later use. + +.. _draw: + Draw ^^^^ +Draw is a software :term:`TCL` pipeline for hardware that lacks vertex shaders +or other essential parts of pre-rasterization vertex preparation. + Gallivm ^^^^^^^ Indices ^^^^^^^ -Pipe Buffer Manager -^^^^^^^^^^^^^^^^^^^ +Indices provides tools for translating or generating element indices for +use with element-based rendering. + +Pipe Buffer Managers +^^^^^^^^^^^^^^^^^^^^ + +Each of these managers provides various services to drivers that are not +fully utilizing a memory manager. Remote Debugger ^^^^^^^^^^^^^^^ @@ -127,12 +138,12 @@ Remote Debugger Runtime Assembly Emission ^^^^^^^^^^^^^^^^^^^^^^^^^ -Surface Context Tracker -^^^^^^^^^^^^^^^^^^^^^^^ - TGSI ^^^^ +The TGSI auxiliary module provides basic utilities for manipulating TGSI +streams. + Translate ^^^^^^^^^ diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst index 6a9110ce786..aec89f8b5c4 100644 --- a/src/gallium/docs/source/glossary.rst +++ b/src/gallium/docs/source/glossary.rst @@ -8,3 +8,8 @@ Glossary Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes multiple samples of the depth buffer, and uses this information to smooth the edges of polygons. + + TCL + Transform, Clipping, & Lighting. The three stages of preparation in a + rasterizing pipeline prior to the actual rasterization of vertices into + fragments. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 9631e6967ef..72bb75a55dc 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -3,6 +3,28 @@ Screen A screen is an object representing the context-independent part of a device. +Useful Flags +------------ + +.. _pipe_texture_usage: + +PIPE_TEXTURE_USAGE +^^^^^^^^^^^^^^^^^^ + +These flags determine the possible roles a texture may be used for during its +lifetime. Texture usage flags are cumulative and may be combined to create a +texture that can be used as multiple things. + +* ``RENDER_TARGET``: A colorbuffer or pixelbuffer. +* ``DISPLAY_TARGET``: A sharable buffer that can be given to another process. +* ``PRIMARY``: A frontbuffer or scanout buffer. +* ``DEPTH_STENCIL``: A depthbuffer, stencilbuffer, or Z buffer. Gallium does + not explicitly provide for stencil-only buffers, so any stencilbuffer + validated here is implicitly also a depthbuffer. +* ``SAMPLER``: A texture that may be sampled from in a fragment or vertex + shader. +* ``DYNAMIC``: A texture that will be mapped frequently. + Methods ------- @@ -33,6 +55,14 @@ is_format_supported See if a format can be used in a specific manner. +**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags. + +Returns TRUE if all usages can be satisfied. + +.. note:: + + ``PIPE_TEXTURE_USAGE_DYNAMIC`` is not a valid usage. + texture_create ^^^^^^^^^^^^^^ diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index ebee4902b05..65a669d8cfa 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -516,8 +516,11 @@ SEQ - Set On Equal .. math:: dst.x = (src0.x == src1.x) ? 1 : 0 + dst.y = (src0.y == src1.y) ? 1 : 0 + dst.z = (src0.z == src1.z) ? 1 : 0 + dst.w = (src0.w == src1.w) ? 1 : 0 @@ -526,8 +529,11 @@ SFL - Set On False .. math:: dst.x = 0 + dst.y = 0 + dst.z = 0 + dst.w = 0 Considered for removal. @@ -537,8 +543,11 @@ SGT - Set On Greater Than .. math:: dst.x = (src0.x > src1.x) ? 1 : 0 + dst.y = (src0.y > src1.y) ? 1 : 0 + dst.z = (src0.z > src1.z) ? 1 : 0 + dst.w = (src0.w > src1.w) ? 1 : 0 @@ -560,8 +569,11 @@ SLE - Set On Less Equal Than .. math:: dst.x = (src0.x <= src1.x) ? 1 : 0 + dst.y = (src0.y <= src1.y) ? 1 : 0 + dst.z = (src0.z <= src1.z) ? 1 : 0 + dst.w = (src0.w <= src1.w) ? 1 : 0 @@ -570,8 +582,11 @@ SNE - Set On Not Equal .. math:: dst.x = (src0.x != src1.x) ? 1 : 0 + dst.y = (src0.y != src1.y) ? 1 : 0 + dst.z = (src0.z != src1.z) ? 1 : 0 + dst.w = (src0.w != src1.w) ? 1 : 0 @@ -580,8 +595,11 @@ STR - Set On True .. math:: dst.x = 1 + dst.y = 1 + dst.z = 1 + dst.w = 1 @@ -629,8 +647,11 @@ X2D - 2D Coordinate Transformation .. math:: dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w + dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w Considered for removal. @@ -979,13 +1000,13 @@ XOR - Bitwise Xor .. math:: - dst.x = src0.x ^ src1.x + dst.x = src0.x \oplus src1.x - dst.y = src0.y ^ src1.y + dst.y = src0.y \oplus src1.y - dst.z = src0.z ^ src1.z + dst.z = src0.z \oplus src1.z - dst.w = src0.w ^ src1.w + dst.w = src0.w \oplus src1.w SAD - Sum Of Absolute Differences diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba8..aa29dcb3947 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -358,6 +358,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +371,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 5c3188e7f9d..e402ed29220 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_fence fence; struct cell_buffer_node *head; }; @@ -115,7 +115,7 @@ struct cell_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_buffer *constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 3fa8b975d39..0a4da8ecc85 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -51,17 +51,17 @@ cell_map_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) { - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + if (sp->constants[i] && sp->constants[i]->size) { + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, sp->mapped_constants[i], - sp->constants[i].buffer->size); + sp->constants[i]->size); } } draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[PIPE_SHADER_VERTEX].buffer->size); + sp->constants[PIPE_SHADER_VERTEX]->size); } static void @@ -70,8 +70,8 @@ cell_unmap_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + if (sp->constants[i] && sp->constants[i]->size) + ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5b87286d4c5..f1e1dcb9eb0 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -240,12 +240,12 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { const uint shader = PIPE_SHADER_FRAGMENT; - const uint num_const = cell->constants[shader].buffer->size / sizeof(float); + const uint num_const = cell->constants[shader]->size / sizeof(float); uint i, j; float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); uint32_t *ibuf = (uint32_t *) buf; const float *constants = pipe_buffer_map(cell->pipe.screen, - cell->constants[shader].buffer, + cell->constants[shader], PIPE_BUFFER_USAGE_CPU_READ); ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; ibuf[4] = num_const; @@ -253,7 +253,7 @@ cell_emit_state(struct cell_context *cell) for (i = 0; i < num_const; i++) { buf[j++] = constants[i]; } - pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader]); } if (cell->dirty & (CELL_NEW_FRAMEBUFFER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 6568c784fec..1b09cf7f7d7 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -183,7 +183,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct cell_context *cell = cell_context(pipe); @@ -193,7 +193,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, draw_flush(cell->draw); /* note: reference counting */ - pipe_buffer_reference(&cell->constants[shader].buffer, buf->buffer); + pipe_buffer_reference(&cell->constants[shader], buf); if (shader == PIPE_SHADER_VERTEX) cell->dirty |= CELL_NEW_VS_CONSTANTS; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 12b855a3db2..55bd85bde2b 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -543,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d86d8e09a51..d2166a49016 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 86056799405..0ca92af248d 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d258..98919c43ffc 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51d..b18f4c22ef1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -107,10 +108,11 @@ struct spu_framebuffer uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; @@ -123,20 +125,22 @@ struct spu_texture_level vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -165,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +191,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073abf..14987e3c3a2 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a57..087963960df 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d397..3e9804bf8ee 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 149393712a3..191a44c3dfc 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -125,7 +125,7 @@ failover_context( struct pipe_context *pipe ) void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); #endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 3f5f5560323..d6ec4d13136 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -495,7 +495,7 @@ failover_set_vertex_elements(struct pipe_context *pipe, void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct failover_context *failover = failover_context(pipe); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 234b441ce6e..37cbd56036b 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -233,7 +233,8 @@ struct i915_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + /* XXX unneded */ + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 5f5b6f8e185..0fab6e1bc36 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -518,7 +518,7 @@ static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) static void i915_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct i915_context *i915 = i915_context(pipe); struct pipe_screen *screen = pipe->screen; @@ -538,13 +538,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, */ if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(screen, buf->buffer, + if (buf->size && + (mapped = pipe_buffer_map(screen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(i915->current.constants[shader], mapped, buf->buffer->size); - pipe_buffer_unmap(screen, buf->buffer); + memcpy(i915->current.constants[shader], mapped, buf->size); + pipe_buffer_unmap(screen, buf); i915->current.num_user_constants[shader] - = buf->buffer->size / (4 * sizeof(float)); + = buf->size / (4 * sizeof(float)); } else { i915->current.num_user_constants[shader] = 0; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index bb32d90e331..e389587f3e1 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -262,7 +262,7 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) static void brw_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct brw_context *brw = brw_context(pipe); @@ -270,13 +270,13 @@ static void brw_set_constant_buffer(struct pipe_context *pipe, if (shader == PIPE_SHADER_FRAGMENT) { pipe_buffer_reference( &brw->curr.fragment_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS; } else { pipe_buffer_reference( &brw->curr.vertex_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 9f5b4e63236..f9063d90fb1 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -404,17 +404,17 @@ static void identity_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *_buffer) + struct pipe_buffer *_buffer) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - struct pipe_constant_buffer unwrapped_buffer; - struct pipe_constant_buffer *buffer = NULL; + struct pipe_buffer *unwrapped_buffer; + struct pipe_buffer *buffer = NULL; - /* unwrap the input state */ + /* XXX hmm? unwrap the input state */ if (_buffer) { - unwrapped_buffer.buffer = identity_buffer_unwrap(_buffer->buffer); - buffer = &unwrapped_buffer; + unwrapped_buffer = identity_buffer_unwrap(_buffer); + buffer = unwrapped_buffer; } pipe->set_constant_buffer(pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 1cc3c9227cc..aaa675aec77 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -124,8 +124,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } for (i = 0; i < Elements(llvmpipe->constants); i++) { - if (llvmpipe->constants[i].buffer) { - pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); + if (llvmpipe->constants[i]) { + pipe_buffer_reference(&llvmpipe->constants[i], NULL); } } diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 6411797cf5d..426d6eb4a12 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -64,7 +64,7 @@ struct llvmpipe_context { /** Other rendering state */ struct pipe_blend_color blend_color[4][16]; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index 7eb05de77a1..c3a48700a4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -31,6 +31,7 @@ #ifndef LP_QUAD_H #define LP_QUAD_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "tgsi/tgsi_exec.h" @@ -83,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; }; @@ -92,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd3..0b2d3a28014 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -117,7 +117,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ -ALIGN_STACK +PIPE_ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7020da145f3..e16793186be 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -154,7 +154,7 @@ void llvmpipe_set_clip_state( struct pipe_context *, void llvmpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); void *llvmpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b73ca2d41ed..9f4bbef73fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -714,12 +714,11 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *constants) + struct pipe_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct pipe_buffer *buffer = constants ? constants->buffer : NULL; - unsigned size = buffer ? buffer->size : 0; - const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; + unsigned size = constants ? constants->size : 0; + const void *data = constants ? llvmpipe_buffer(constants)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); @@ -727,7 +726,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, draw_flush(llvmpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); + pipe_buffer_reference(&llvmpipe->constants[shader], constants); if(shader == PIPE_SHADER_FRAGMENT) { llvmpipe->jit_context.constants = data; diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981a..6c29e8d8ace 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index faddfb96779..c1abee424c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 23ea9ebbe7d..2b258f1052e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index e3dc4c5bf44..b67f1e16b1c 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -332,7 +332,7 @@ nv04_set_clip_state(struct pipe_context *pipe, static void nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv04_context *nv04 = nv04_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -342,13 +342,13 @@ nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf && buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv04->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv04->constbuf[shader], mapped, buf->size); nv04->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index ffc6be3c401..ad7def53b12 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -458,7 +458,7 @@ nv10_set_clip_state(struct pipe_context *pipe, static void nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv10_context *nv10 = nv10_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -468,13 +468,13 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv10->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv10->constbuf[shader], mapped, buf->size); nv10->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index 3a82e63423d..45697a60efd 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -451,7 +451,7 @@ nv20_set_clip_state(struct pipe_context *pipe, static void nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv20_context *nv20 = nv20_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -461,13 +461,13 @@ nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv20->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv20->constbuf[shader], mapped, buf->size); nv20->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 98935678911..0cc3172dcd5 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -43,7 +43,6 @@ static struct nv30_texture_format * nv30_fragtex_format(uint pipe_format) { struct nv30_texture_format *tf = nv30_texture_formats; - char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -65,7 +64,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); struct nv30_texture_format *tf; struct nouveau_stateobj *so; - uint32_t txf, txs , txp; + uint32_t txf, txs; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; tf = nv30_fragtex_format(pt->format); @@ -97,13 +96,6 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) return NULL; } - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - txp = 0; - } else { - txp = nv30mt->level[0].pitch; - txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */; - } - txs = tf->swizzle; so = so_new(1, 8, 2); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index a80dfb04880..065c927a10d 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -590,12 +590,12 @@ nv30_set_clip_state(struct pipe_context *pipe, static void nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv30_context *nv30 = nv30_context(pipe); - nv30->constbuf[shader] = buf->buffer; - nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv30->constbuf[shader] = buf; + nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv30->dirty |= NV30_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index ed0ca9e02c3..7d990f7d567 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -605,12 +605,12 @@ nv40_set_clip_state(struct pipe_context *pipe, static void nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->constbuf[shader] = buf->buffer; - nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv40->constbuf[shader] = buf; + nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv40->dirty |= NV40_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 5997456e4c9..1e697463221 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -67,8 +67,12 @@ nv50_destroy(struct pipe_context *pipe) so_ref(NULL, &nv50->state.vertprog); if (nv50->state.fragprog) so_ref(NULL, &nv50->state.fragprog); - if (nv50->state.programs) - so_ref(NULL, &nv50->state.programs); + if (nv50->state.geomprog) + so_ref(NULL, &nv50->state.geomprog); + if (nv50->state.fp_linkage) + so_ref(NULL, &nv50->state.fp_linkage); + if (nv50->state.gp_linkage) + so_ref(NULL, &nv50->state.gp_linkage); if (nv50->state.vtxfmt) so_ref(NULL, &nv50->state.vtxfmt); if (nv50->state.vtxbuf) @@ -100,7 +104,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.destroy = nv50_destroy; nv50->pipe.draw_arrays = nv50_draw_arrays; + nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; nv50->pipe.draw_elements = nv50_draw_elements; + nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; nv50->pipe.clear = nv50_clear; nv50->pipe.flush = nv50_flush; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cbd4c3ff86d..bebcd95054f 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -29,9 +29,7 @@ #define NV50_CB_PVP 1 #define NV50_CB_PFP 2 #define NV50_CB_PGP 3 -#define NV50_CB_TIC 4 -#define NV50_CB_TSC 5 -#define NV50_CB_PUPLOAD 6 +#define NV50_CB_AUX 4 #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) @@ -45,9 +43,11 @@ #define NV50_NEW_VERTPROG_CB (1 << 9) #define NV50_NEW_FRAGPROG (1 << 10) #define NV50_NEW_FRAGPROG_CB (1 << 11) -#define NV50_NEW_ARRAYS (1 << 12) -#define NV50_NEW_SAMPLER (1 << 13) -#define NV50_NEW_TEXTURE (1 << 14) +#define NV50_NEW_GEOMPROG (1 << 12) +#define NV50_NEW_GEOMPROG_CB (1 << 13) +#define NV50_NEW_ARRAYS (1 << 14) +#define NV50_NEW_SAMPLER (1 << 15) +#define NV50_NEW_TEXTURE (1 << 16) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -129,10 +129,13 @@ struct nv50_state { unsigned miptree_nr[PIPE_SHADER_TYPES]; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; - struct nouveau_stateobj *programs; + struct nouveau_stateobj *geomprog; + struct nouveau_stateobj *fp_linkage; + struct nouveau_stateobj *gp_linkage; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; + struct nouveau_stateobj *instbuf; unsigned vtxelt_nr; }; @@ -157,6 +160,7 @@ struct nv50_context { struct pipe_framebuffer_state framebuffer; struct nv50_program *vertprog; struct nv50_program *fragprog; + struct nv50_program *geomprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; @@ -193,11 +197,22 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); +extern void nv50_draw_arrays_instanced(struct pipe_context *, unsigned mode, + unsigned start, unsigned count, + unsigned startInstance, + unsigned instanceCount); extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +extern void nv50_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); extern void nv50_vbo_validate(struct nv50_context *nv50); /* nv50_clear.c */ @@ -207,7 +222,9 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); -extern void nv50_linkage_validate(struct nv50_context *nv50); +extern void nv50_geomprog_validate(struct nv50_context *nv50); +extern void nv50_fp_linkage_validate(struct nv50_context *nv50); +extern void nv50_gp_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a139..dc8364ced7e 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -92,12 +92,23 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) case PIPE_FORMAT_Z24S8_UNORM: tile_flags = 0x1800; break; + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c00; + break; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: tile_flags = 0x2800; break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + tile_flags = 0x7400; + break; default: - tile_flags = 0x7000; + if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) && + util_format_get_blocksizebits(pt->format) == 32) + tile_flags = 0x7a00; + else + tile_flags = 0x7000; break; } @@ -145,7 +156,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +199,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index e16fa479e5f..20db51070ff 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -92,6 +92,11 @@ struct nv50_reg { int rhw; /* result hw for FP outputs, or interpolant index */ int acc; /* instruction where this reg is last read (first insn == 1) */ + + int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */ + int indirect[2]; /* index into pc->addr, or -1 */ + + ubyte buf_index; /* c{0 .. 15}[] or g{0 .. 15}[] */ }; #define NV50_MOD_NEG 1 @@ -135,7 +140,8 @@ struct nv50_pc { int immd_nr; struct nv50_reg **addr; int addr_nr; - uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ + struct nv50_reg *sysval; + int sysval_nr; struct nv50_reg *temp_temp[16]; struct nv50_program_exec *temp_temp_exec[16]; @@ -171,6 +177,8 @@ struct nv50_pc { uint8_t edgeflag_out; }; +static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *); + static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) { @@ -179,7 +187,10 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) reg->hw = hw; reg->mod = 0; reg->rhw = -1; + reg->vtx = -1; reg->acc = 0; + reg->indirect[0] = reg->indirect[1] = -1; + reg->buf_index = (type == P_CONST) ? 1 : 0; } static INLINE unsigned @@ -197,7 +208,8 @@ terminate_mbb(struct nv50_pc *pc) /* remove records of temporary address register values */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - pc->r_addr[i].rhw = -1; + if (pc->r_addr[i].index < 0) + pc->r_addr[i].acc = 0; } static void @@ -260,6 +272,7 @@ reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) if (reg) { alloc_reg(pc, reg); *ri = *reg; + reg->indirect[0] = reg->indirect[1] = -1; reg->mod = 0; } return ri; @@ -464,6 +477,12 @@ is_join(struct nv50_program_exec *e) return FALSE; } +static INLINE boolean +is_control_flow(struct nv50_program_exec *e) +{ + return (e->inst[0] & 2); +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) @@ -525,11 +544,33 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) static INLINE void set_addr(struct nv50_program_exec *e, struct nv50_reg *a) { + assert(a->type == P_ADDR); + assert(!(e->inst[0] & 0x0c000000)); assert(!(e->inst[1] & 0x00000004)); e->inst[0] |= (a->hw & 3) << 26; - e->inst[1] |= (a->hw >> 2) << 2; + e->inst[1] |= a->hw & 4; +} + +static void +emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t); + +static void +emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int); + +static void +emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[1] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + set_addr(e, src); + + emit(pc, e); } static void @@ -548,72 +589,6 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } -static struct nv50_reg * -alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) -{ - struct nv50_reg *a_tgsi = NULL, *a = NULL; - int i; - uint8_t avail = ~pc->addr_alloc; - - if (!ref) { - /* allocate for TGSI_FILE_ADDRESS */ - while (avail) { - i = ffs(avail) - 1; - - if (pc->r_addr[i].rhw < 0 || - pc->r_addr[i].acc != pc->insn_cur) { - pc->addr_alloc |= (1 << i); - - pc->r_addr[i].rhw = -1; - pc->r_addr[i].index = i; - return &pc->r_addr[i]; - } - avail &= ~(1 << i); - } - assert(0); - return NULL; - } - - /* Allocate and set an address reg so we can access 'ref'. - * - * If and r_addr->index will be -1 or the hw index the value - * value in rhw is relative to. If rhw < 0, the reg has not - * been initialized or is in use for TGSI_FILE_ADDRESS. - */ - while (avail) { /* only consider regs that are not TGSI */ - i = ffs(avail) - 1; - avail &= ~(1 << i); - - if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) { - /* prefer an usused reg with low hw index */ - a = &pc->r_addr[i]; - continue; - } - if (!a && pc->r_addr[i].acc != pc->insn_cur) - a = &pc->r_addr[i]; - - if (ref->hw - pc->r_addr[i].rhw >= 128) - continue; - - if ((ref->acc >= 0 && pc->r_addr[i].index < 0) || - (ref->acc < 0 && pc->r_addr[i].index == ref->index)) { - pc->r_addr[i].acc = pc->insn_cur; - return &pc->r_addr[i]; - } - } - assert(a); - - if (ref->acc < 0) - a_tgsi = pc->addr[ref->index]; - - emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); - - a->rhw = ref->hw & ~0x7f; - a->acc = pc->insn_cur; - a->index = a_tgsi ? ref->index : -1; - return a; -} - #define INTERP_LINEAR 0 #define INTERP_FLAT 1 #define INTERP_PERSPECTIVE 2 @@ -657,15 +632,15 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->param.shift = s; e->param.mask = m << (s % 32); - if (src->hw > 127) - set_addr(e, alloc_addr(pc, src)); + if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */ + set_addr(e, get_address_reg(pc, src)); else if (src->acc < 0) { assert(src->type == P_CONST); - set_addr(e, pc->addr[src->index]); + set_addr(e, pc->addr[src->indirect[0]]); } - e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); + e->inst[1] |= (src->buf_index << 22); } /* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ @@ -694,6 +669,12 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + /* indirect (vertex base + c) load from p[] */ + e->inst[0] |= 0x01800000; + set_addr(e, get_address_reg(pc, src)); + } } alloc_reg(pc, src); @@ -808,6 +789,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + e->inst[0] |= 0x01800000; /* src from p[] */ + set_addr(e, get_address_reg(pc, src)); + } } else if (src->type == P_CONST || src->type == P_IMMD) { struct nv50_reg *temp = temp_temp(pc, e); @@ -832,13 +818,13 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x00800000)); - if (e->inst[0] & 0x01000000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x00800000)); set_data(pc, src, 0x7f, 16, e); e->inst[0] |= 0x00800000; } @@ -862,13 +848,13 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x01000000)); - if (e->inst[0] & 0x00800000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x01000000)); set_data(pc, src, 0x7f, 32+14, e); e->inst[0] |= 0x01000000; } @@ -997,11 +983,125 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= dst->hw << 2; e->inst[0] |= s << 16; /* shift left */ - set_src_0_restricted(pc, src, e); + set_src_0(pc, src, e); emit(pc, e); } +static boolean +address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r) +{ + if (!r) + return FALSE; + + if (r->vtx != a->vtx) + return FALSE; + if (r->vtx >= 0) + return (r->indirect[1] == a->indirect[1]); + + if (r->hw < a->rhw || (r->hw - a->rhw) >= 128) + return FALSE; + + if (a->index >= 0) + return (a->index == r->indirect[0]); + return (a->indirect[0] == r->indirect[0]); +} + +static void +load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *a, int shift) +{ + struct nv50_reg mem, *temp; + + ctor_reg(&mem, P_ATTR, -1, dst->vtx); + + assert(dst->type == P_ADDR); + if (!a) { + emit_arl(pc, dst, &mem, 0); + return; + } + temp = alloc_temp(pc, NULL); + + if (shift) { + emit_mov_from_addr(pc, temp, a); + if (shift < 0) + emit_shl_imm(pc, temp, temp, shift); + emit_arl(pc, dst, temp, MAX2(shift, 0)); + } + emit_mov(pc, temp, &mem); + set_addr(pc->p->exec_tail, dst); + + emit_arl(pc, dst, temp, 0); + free_temp(pc, temp); +} + +/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS + * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX + * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX + * case (vtx < 0, acc >= 0): memory address too high to encode + * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS + */ +static struct nv50_reg * +get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref) +{ + int i; + struct nv50_reg *a_ref, *a = NULL; + + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { + if (pc->r_addr[i].acc == 0) + a = &pc->r_addr[i]; /* an unused address reg */ + else + if (address_reg_suitable(&pc->r_addr[i], ref)) { + pc->r_addr[i].acc = pc->insn_cur; + return &pc->r_addr[i]; + } else + if (!a && pc->r_addr[i].index < 0 && + pc->r_addr[i].acc < pc->insn_cur) + a = &pc->r_addr[i]; + } + if (!a) { + /* We'll be able to spill address regs when this + * mess is replaced with a proper compiler ... + */ + NOUVEAU_ERR("out of address regs\n"); + abort(); + return NULL; + } + + /* initialize and reserve for this TGSI instruction */ + a->rhw = 0; + a->index = a->indirect[0] = a->indirect[1] = -1; + a->acc = pc->insn_cur; + + if (!ref) { + a->vtx = -1; + return a; + } + a->vtx = ref->vtx; + + /* now put in the correct value ... */ + + if (ref->vtx >= 0) { + a->indirect[1] = ref->indirect[1]; + + /* For an indirect vertex index, we need to shift address right + * by 2, the address register will contain vtx * 16, we need to + * load from a[vtx * 4]. + */ + load_vertex_base(pc, a, (ref->acc < 0) ? + pc->addr[ref->indirect[1]] : NULL, -2); + } else { + assert(ref->acc < 0 || ref->indirect[0] < 0); + + a->rhw = ref->hw & ~0x7f; + a->indirect[0] = ref->indirect[0]; + a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL; + + emit_add_addr_imm(pc, a, a_ref, a->rhw * 4); + } + return a; +} + #define NV50_MAX_F32 0x880 #define NV50_MAX_S32 0x08c #define NV50_MAX_U32 0x084 @@ -1629,6 +1729,18 @@ emit_ret(struct nv50_pc *pc, int pred, unsigned cc) emit_control_flow(pc, 0x3, pred, cc); } +static void +emit_prim_cmd(struct nv50_pc *pc, unsigned cmd) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000 | (cmd << 9); + e->inst[1] = 0xc0000000; + set_long(pc, e); + + emit(pc, e); +} + #define QOP_ADD 0 #define QOP_SUBR 1 #define QOP_SUB 2 @@ -2171,14 +2283,19 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { - r = alloc_addr(pc, NULL); - pc->addr[dst->Register.Index * 4 + c] = r; + r = get_address_reg(pc, NULL); + r->index = dst->Register.Index * 4 + c; + pc->addr[r->index] = r; } assert(r); return r; } case TGSI_FILE_NULL: return NULL; + case TGSI_FILE_SYSTEM_VALUE: + assert(pc->sysval[dst->Register.Index].type == P_RESULT); + assert(c == 0); + return &pc->sysval[dst->Register.Index]; default: break; } @@ -2208,6 +2325,18 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, switch (src->Register.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->Register.Index * 4 + c]; + + if (!src->Dimension.Dimension) + break; + r = reg_instance(pc, r); + r->vtx = src->Dimension.Index; + + if (!src->Dimension.Indirect) + break; + swz = tgsi_util_get_src_register_swizzle( + &src->DimIndirect, 0); + r->acc = -1; + r->indirect[1] = src->DimIndirect.Index * 4 + swz; break; case TGSI_FILE_TEMPORARY: r = &pc->temp[src->Register.Index * 4 + c]; @@ -2221,12 +2350,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, * use the index field to select the address reg. */ r = reg_instance(pc, NULL); + ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c); + swz = tgsi_util_get_src_register_swizzle( - &src->Indirect, 0); - ctor_reg(r, P_CONST, - src->Indirect.Index * 4 + swz, - src->Register.Index * 4 + c); + &src->Indirect, 0); r->acc = -1; + r->indirect[0] = src->Indirect.Index * 4 + swz; break; case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->Register.Index * 4 + c]; @@ -2237,6 +2366,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; + case TGSI_FILE_SYSTEM_VALUE: + assert(c == 0); + r = &pc->sysval[src->Register.Index]; + break; default: assert(0); break; @@ -2273,7 +2406,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r->mod |= mod & NV50_MOD_I32; assert(r); - if (r->acc >= 0 && r != temp) + if (r->acc >= 0 && r->vtx < 0 && r != temp) return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2495,10 +2628,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_ARL: - assert(src[0][0]); temp = temp_temp(pc, NULL); - emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); - emit_arl(pc, dst[0], temp, 4); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, temp, src[0][c], -1, + CVT_FLOOR | CVT_S32_F32); + emit_arl(pc, dst[c], temp, 4); + } break; case TGSI_OPCODE_BGNLOOP: pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc); @@ -2605,6 +2742,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); break; + case TGSI_OPCODE_EMIT: + emit_prim_cmd(pc, 1); + break; case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; @@ -2628,8 +2768,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDPRIM: + emit_prim_cmd(pc, 2); + break; case TGSI_OPCODE_ENDSUB: assert(pc->in_subroutine); + terminate_mbb(pc); pc->in_subroutine = FALSE; break; case TGSI_OPCODE_EX2: @@ -3028,10 +3172,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!is_long(pc->p->exec_tail)) convert_to_long(pc, pc->p->exec_tail); else - if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + if (is_immd(pc->p->exec_tail) || + is_join(pc->p->exec_tail) || + is_control_flow(pc->p->exec_tail)) emit_nop(pc); pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ + + terminate_mbb(pc); break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -3327,17 +3475,53 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) * value of 0 for back-facing, and 0xffffffff for front-facing. */ static void -load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) +load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv) +{ + struct nv50_reg *temp = alloc_temp(pc, NULL); + int r_pred = 0; + + temp->rhw = 255; + emit_interp(pc, temp, NULL, INTERP_FLAT); + + emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32); + + emit_not(pc, temp, temp); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + emit_cvt(pc, sv, temp, -1, CVT_F32_S32); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + + free_temp(pc, temp); +} + +static void +load_instance_id(struct nv50_pc *pc, unsigned index) { - struct nv50_reg *one = alloc_immd(pc, 1.0f); + struct nv50_reg reg, mem; - assert(a->rhw == -1); - alloc_reg(pc, a); /* do this before rhw is set */ - a->rhw = 255; - load_interpolant(pc, a); - emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND); + ctor_reg(®, P_TEMP, -1, -1); + ctor_reg(&mem, P_CONST, -1, 24); /* startInstance */ + mem.buf_index = 2; - FREE(one); + emit_add_b32(pc, ®, &pc->sysval[index], &mem); + pc->sysval[index] = reg; +} + +static void +copy_semantic_info(struct nv50_program *p) +{ + unsigned i, id; + + for (i = 0; i < p->cfg.in_nr; ++i) { + id = p->cfg.in[i].id; + p->cfg.in[i].sn = p->info.input_semantic_name[id]; + p->cfg.in[i].si = p->info.input_semantic_index[id]; + } + + for (i = 0; i < p->cfg.out_nr; ++i) { + id = p->cfg.out[i].id; + p->cfg.out[i].sn = p->info.output_semantic_name[id]; + p->cfg.out[i].si = p->info.output_semantic_index[id]; + } } static boolean @@ -3346,7 +3530,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct tgsi_parse_context tp; struct nv50_program *p = pc->p; boolean ret = FALSE; - unsigned i, c, flat_nr = 0; + unsigned i, c, instance_id, vertex_id, flat_nr = 0; tgsi_parse_init(&tp, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&tp)) { @@ -3386,13 +3570,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_PSIZE: p->cfg.psiz = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_EDGEFLAG: pc->edgeflag_out = first; @@ -3432,6 +3616,37 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->interp_mode[i] = mode; } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(d->Declaration.Semantic); + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_FACE: + assert(p->type == PIPE_SHADER_FRAGMENT); + load_frontfacing(pc, + &pc->sysval[first]); + break; + case TGSI_SEMANTIC_INSTANCEID: + assert(p->type == PIPE_SHADER_VERTEX); + instance_id = first; + p->cfg.regs[0] |= (1 << 4); + break; + case TGSI_SEMANTIC_PRIMID: + assert(p->type != PIPE_SHADER_VERTEX); + p->cfg.prim_id = first; + break; + /* + case TGSI_SEMANTIC_PRIMIDIN: + assert(p->type == PIPE_SHADER_GEOMETRY); + pc->sysval[first].hw = 6; + p->cfg.regs[0] |= (1 << 8); + break; + case TGSI_SEMANTIC_VERTEXID: + assert(p->type == PIPE_SHADER_VERTEX); + vertex_id = first; + p->cfg.regs[0] |= (1 << 12) | (1 << 0); + break; + */ + } + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_SAMPLER: @@ -3452,68 +3667,98 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - if (p->type == PIPE_SHADER_VERTEX) { + if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) { int rid = 0; - for (i = 0; i < pc->attr_nr * 4; ++i) { - if (pc->attr[i].acc) { - pc->attr[i].hw = rid++; - p->cfg.attr[i / 32] |= 1 << (i % 32); + if (p->type == PIPE_SHADER_GEOMETRY) { + for (i = 0; i < pc->attr_nr; ++i) { + p->cfg.in[i].hw = rid; + p->cfg.in[i].id = i; + + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->attr[n].acc) + continue; + pc->attr[n].hw = rid++; + p->cfg.in[i].mask |= 1 << c; + } + } + } else { + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); + } + } + if (p->cfg.regs[0] & (1 << 0)) + pc->sysval[vertex_id].hw = rid++; + if (p->cfg.regs[0] & (1 << 4)) { + pc->sysval[instance_id].hw = rid++; + load_instance_id(pc, instance_id); } } for (i = 0, rid = 0; i < pc->result_nr; ++i) { - p->cfg.io[i].hw = rid; - p->cfg.io[i].id = i; + p->cfg.out[i].hw = rid; + p->cfg.out[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; if (!pc->result[n].acc) continue; pc->result[n].hw = rid++; - p->cfg.io[i].mask |= 1 << c; + p->cfg.out[i].mask |= 1 << c; } } + if (p->cfg.prim_id < 0x40) { + /* GP has to write to PrimitiveID */ + ctor_reg(&pc->sysval[p->cfg.prim_id], + P_RESULT, p->cfg.prim_id, rid); + p->cfg.prim_id = rid++; + } for (c = 0; c < 2; ++c) if (p->cfg.two_side[c].hw < 0x40) - p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c] = p->cfg.out[ p->cfg.two_side[c].hw]; if (p->cfg.psiz < 0x40) - p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw; + + copy_semantic_info(p); } else if (p->type == PIPE_SHADER_FRAGMENT) { - int rid, aid; + int rid, aid, base; unsigned n = 0, m = pc->attr_nr - flat_nr; pc->allow32 = TRUE; - int base = (TGSI_SEMANTIC_POSITION == - p->info.input_semantic_name[0]) ? 0 : 1; + base = (TGSI_SEMANTIC_POSITION == + p->info.input_semantic_name[0]) ? 0 : 1; /* non-flat interpolants have to be mapped to * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { if (pc->interp_mode[i] == INTERP_FLAT) - p->cfg.io[m++].id = i; + p->cfg.in[m++].id = i; else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) - p->cfg.io[n].linear = TRUE; - p->cfg.io[n++].id = i; + p->cfg.in[n].linear = TRUE; + p->cfg.in[n++].id = i; } } + copy_semantic_info(p); if (!base) /* set w-coordinate mask from perspective interp */ - p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; + p->cfg.in[0].mask |= p->cfg.regs[1] >> 24; aid = popcnt4( /* if fcrd isn't contained in cfg.io */ - base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); + base ? (p->cfg.regs[1] >> 24) : p->cfg.in[0].mask); for (n = 0; n < pc->attr_nr; ++n) { - p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id; + p->cfg.in[n].hw = rid = aid; + i = p->cfg.in[n].id; if (p->info.input_semantic_name[n] == TGSI_SEMANTIC_FACE) { @@ -3525,15 +3770,15 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!pc->attr[i * 4 + c].acc) continue; pc->attr[i * 4 + c].rhw = rid++; - p->cfg.io[n].mask |= 1 << c; + p->cfg.in[n].mask |= 1 << c; load_interpolant(pc, &pc->attr[i * 4 + c]); } - aid += popcnt4(p->cfg.io[n].mask); + aid += popcnt4(p->cfg.in[n].mask); } if (!base) - p->cfg.regs[1] |= p->cfg.io[0].mask << 24; + p->cfg.regs[1] |= p->cfg.in[0].mask << 24; m = popcnt4(p->cfg.regs[1] >> 24); @@ -3543,32 +3788,33 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.regs[1] |= aid - m; if (flat_nr) { - i = p->cfg.io[pc->attr_nr - flat_nr].hw; + i = p->cfg.in[pc->attr_nr - flat_nr].hw; p->cfg.regs[1] |= (i - m) << 16; } else p->cfg.regs[1] |= p->cfg.regs[1] << 16; /* mark color semantic for light-twoside */ - n = 0x40; - for (i = 0; i < pc->attr_nr; i++) { - ubyte si, sn; - - sn = p->info.input_semantic_name[p->cfg.io[i].id]; - si = p->info.input_semantic_index[p->cfg.io[i].id]; - - if (sn == TGSI_SEMANTIC_COLOR) { - p->cfg.two_side[si] = p->cfg.io[i]; - - /* increase colour count */ - p->cfg.regs[0] += popcnt4( - p->cfg.two_side[si].mask) << 16; - - n = MIN2(n, p->cfg.io[i].hw - m); + n = 0x80; + for (i = 0; i < p->cfg.in_nr; i++) { + if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) { + n = MIN2(n, p->cfg.in[i].hw - m); + p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i]; + + p->cfg.regs[0] += /* increase colour count */ + popcnt4(p->cfg.in[i].mask) << 16; } } - if (n < 0x40) + if (n < 0x80) p->cfg.regs[0] += n; + if (p->cfg.prim_id < 0x40) { + pc->sysval[p->cfg.prim_id].rhw = rid++; + emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL, + INTERP_FLAT); + /* increase FP_INTERPOLANT_CTRL_COUNT */ + p->cfg.regs[1] += 1; + } + /* Initialize FP results: * FragDepth is always first TGSI and last hw output */ @@ -3622,10 +3868,31 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->sysval) + FREE(pc->sysval); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } +static INLINE uint32_t +nv50_map_gs_output_prim(unsigned pprim) +{ + switch (pprim) { + case PIPE_PRIM_POINTS: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; + case PIPE_PRIM_LINE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; + default: + NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim); + abort(); + return 0; + } +} + static boolean ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) { @@ -3639,25 +3906,55 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; assert(pc->addr_nr <= 2); + pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; p->cfg.high_temp = 4; p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.prim_id = 0x40; p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + for (i = 0; i < p->info.num_properties; ++i) { + unsigned *data = &p->info.properties[i].data[0]; + + switch (p->info.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + p->cfg.prim_type = nv50_map_gs_output_prim(data[0]); + break; + case TGSI_PROPERTY_GS_MAX_VERTICES: + p->cfg.vert_count = data[0]; + break; + default: + break; + } + } + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; p->cfg.clpd = 0x40; - p->cfg.io_nr = pc->result_nr; + p->cfg.out_nr = pc->result_nr; + break; + case PIPE_SHADER_GEOMETRY: + assert(p->cfg.prim_type); + assert(p->cfg.vert_count); + + p->cfg.psiz = 0x80; + p->cfg.clpd = 0x80; + p->cfg.prim_id = 0x80; + p->cfg.out_nr = pc->result_nr; + p->cfg.in_nr = pc->attr_nr; + + p->cfg.two_side[0].hw = 0x80; + p->cfg.two_side[1].hw = 0x80; break; case PIPE_SHADER_FRAGMENT: rtype[0] = rtype[1] = P_TEMP; p->cfg.regs[0] = 0x01000004; - p->cfg.io_nr = pc->attr_nr; + p->cfg.in_nr = pc->attr_nr; if (p->info.writes_z) { p->cfg.regs[2] |= 0x00000100; @@ -3715,7 +4012,16 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return FALSE; } for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); + ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + + if (pc->sysval_nr) { + pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *)); + if (!pc->sysval) + return FALSE; + /* will only ever use SYSTEM_VALUE[i].x (hopefully) */ + for (i = 0; i < pc->sysval_nr; ++i) + ctor_reg(&pc->sysval[i], rtype[0], i, -1); + } return TRUE; } @@ -3877,13 +4183,17 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { unsigned cb; - uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], + uint32_t *map = pipe_buffer_map(pscreen, + nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - - if (p->type == PIPE_SHADER_VERTEX) + switch (p->type) { + case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break; + case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break; + default: cb = NV50_CB_PVP; - else - cb = NV50_CB_PFP; + assert(p->type == PIPE_SHADER_VERTEX); + break; + } nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); @@ -3977,19 +4287,18 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(5, 8, 2); + so = so_new(5, 7, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); + NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); so_data (so, p->cfg.attr[0]); so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); - so_data (so, p->cfg.high_result); //8); + so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_VP_START_ID, 1); so_data (so, 0); /* program start offset */ @@ -4033,42 +4342,74 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } +void +nv50_geomprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->geomprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(6, 7, 2); + so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1); + so_data (so, p->cfg.prim_type); + so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1); + so_data (so, p->cfg.vert_count); + so_method(so, tesla, NV50TCL_GP_START_ID, 1); + so_data (so, 0); + so_ref(so, &nv50->state.geomprog); + so_ref(NULL, &so); +} + static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { + struct nv50_program *vp; struct nv50_program *fp = nv50->fragprog; - struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; uint32_t origin = 0x00000010; + vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog; + /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in * the VP is PointCoord. */ memset(pntc, 0, 8 * sizeof(uint32_t)); - for (i = 0; i < fp->cfg.io_nr; i++) { - uint8_t sn, si; - uint8_t j, k = fp->cfg.io[i].id; - unsigned n = popcnt4(fp->cfg.io[i].mask); + for (i = 0; i < fp->cfg.in_nr; i++) { + unsigned j, n = popcnt4(fp->cfg.in[i].mask); - if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) { m += n; continue; } - for (j = 0; j < vp->info.num_outputs; ++j) { - sn = vp->info.output_semantic_name[j]; - si = vp->info.output_semantic_index[j]; - - if (sn == fp->info.input_semantic_name[k] && - si == fp->info.input_semantic_index[k]) + for (j = 0; j < vp->cfg.out_nr; ++j) + if (vp->cfg.out[j].sn == fp->cfg.in[i].sn && + vp->cfg.out[j].si == fp->cfg.in[i].si) break; - } - if (j < vp->info.num_outputs) { - ubyte mode = - nv50->rasterizer->pipe.sprite_coord_mode[si]; + if (j < vp->cfg.out_nr) { + ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[ + vp->cfg.out[j].si]; if (mode == PIPE_SPRITE_COORD_NONE) { m += n; @@ -4080,7 +4421,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) /* this is either PointCoord or replaced by sprite coords */ for (c = 0; c < 4; c++) { - if (!(fp->cfg.io[i].mask & (1 << c))) + if (!(fp->cfg.in[i].mask & (1 << c))) continue; pntc[m / 8] |= (c + 1) << ((m % 8) * 4); ++m; @@ -4090,18 +4431,22 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) } static int -nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], - struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) { int c; uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; - uint8_t *map = (uint8_t *)p_map; + uint8_t *map = (uint8_t *)map32; for (c = 0; c < 4; ++c) { if (mf & 1) { if (fpi->linear == TRUE) lin[mid / 32] |= 1 << (mid % 32); - map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + if (mv & 1) + map[mid] = oid; + else + map[mid] = (c == 3) ? (zval + 1) : zval; + ++mid; } oid += mv & 1; @@ -4113,34 +4458,42 @@ nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], } void -nv50_linkage_validate(struct nv50_context *nv50) +nv50_fp_linkage_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *vp = nv50->vertprog; struct nv50_program *fp = nv50->fragprog; struct nouveau_stateobj *so; - struct nv50_sreg4 dummy, *vpo; + struct nv50_sreg4 dummy; int i, n, c, m = 0; - uint32_t map[16], lin[4], reg[5], pcrd[8]; + uint32_t map[16], lin[4], reg[6], pcrd[8]; + uint8_t zval = 0x40; + if (nv50->geomprog) { + vp = nv50->geomprog; + zval = 0x80; + } memset(map, 0, sizeof(map)); memset(lin, 0, sizeof(lin)); reg[1] = 0x00000004; /* low and high clip distance map ids */ reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ reg[3] = 0x00000000; /* point size map id & enable */ + reg[5] = 0x00000000; /* primitive ID map slot */ reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ reg[4] = fp->cfg.regs[1]; /* interpolant info */ dummy.linear = FALSE; dummy.mask = 0xf; /* map all components of HPOS */ - m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]); dummy.mask = 0x0; if (vp->cfg.clpd < 0x40) { - for (c = 0; c < vp->cfg.clpd_nr; ++c) - map[m++] = vp->cfg.clpd + c; + for (c = 0; c < vp->cfg.clpd_nr; ++c) { + map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8); + ++m; + } reg[1] = (m << 8); } @@ -4148,35 +4501,37 @@ nv50_linkage_validate(struct nv50_context *nv50) /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ if (nv50->rasterizer->pipe.light_twoside) { - vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *fpi = &fp->cfg.two_side[0]; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]); } reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - for (i = 0; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; - - /* position must be mapped first */ - assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); - + for (i = 0; i < fp->cfg.in_nr; i++) { /* maybe even remove these from cfg.io */ - if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION || + fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE) continue; - /* VP outputs and vp->cfg.io are in the same order */ - for (n = 0; n < vp->info.num_outputs; ++n) { - if (vp->info.output_semantic_name[n] == sn && - vp->info.output_semantic_index[n] == si) + for (n = 0; n < vp->cfg.out_nr; ++n) + if (vp->cfg.out[n].sn == fp->cfg.in[i].sn && + vp->cfg.out[n].si == fp->cfg.in[i].si) break; - } - vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i], + (n < vp->cfg.out_nr) ? + &vp->cfg.out[n] : &dummy); + } + /* PrimitiveID either is replaced by the system value, or + * written by the geometry shader into an output register + */ + if (fp->cfg.prim_id < 0x40) { + map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8); + reg[5] = m++; } if (nv50->rasterizer->pipe.point_size_per_vertex) { @@ -4184,14 +4539,28 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[3] = (m++ << 4) | 1; } - /* now fill the stateobj */ - so = so_new(7, 57, 0); + /* now fill the stateobj (at most 28 so_data) */ + so = so_new(10, 54, 0); n = (m + 3) / 4; - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); - so_data (so, m); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); - so_datap (so, map, n); + assert(m <= 32); + if (vp->type == PIPE_SHADER_GEOMETRY) { + so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n); + so_datap (so, map, n); + } else { + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0]); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1); + so_data (so, reg[5]); + + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + } so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); so_datap (so, reg, 4); @@ -4211,8 +4580,77 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, pcrd, 8); } - so_ref(so, &nv50->state.programs); - so_ref(NULL, &so); + so_method(so, tesla, NV50TCL_GP_ENABLE, 1); + so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0); + + so_ref(so, &nv50->state.fp_linkage); + so_ref(NULL, &so); +} + +static int +construct_vp_gp_mapping(uint32_t *map32, int m, + struct nv50_program *vp, struct nv50_program *gp) +{ + uint8_t *map = (uint8_t *)map32; + int i, j, c; + + for (i = 0; i < gp->cfg.in_nr; ++i) { + uint8_t oid, mv = 0, mg = gp->cfg.in[i].mask; + + for (j = 0; j < vp->cfg.out_nr; ++j) { + if (vp->cfg.out[j].sn == gp->cfg.in[i].sn && + vp->cfg.out[j].si == gp->cfg.in[i].si) { + mv = vp->cfg.out[j].mask; + oid = vp->cfg.out[j].hw; + break; + } + } + + for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { + if (mg & mv & 1) + map[m++] = oid; + else + if (mg & 1) + map[m++] = (c == 3) ? 0x41 : 0x40; + oid += mv & 1; + } + } + return m; +} + +void +nv50_gp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *gp = nv50->geomprog; + uint32_t map[16]; + int m = 0; + + if (!gp) { + so_ref(NULL, &nv50->state.gp_linkage); + return; + } + memset(map, 0, sizeof(map)); + + m = construct_vp_gp_mapping(map, m, vp, gp); + + so = so_new(3, 24 - 3, 0); + + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]); + + assert(m <= 32); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + + m = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m); + so_datap (so, map, m); + + so_ref(so, &nv50->state.gp_linkage); + so_ref(NULL, &so); } void @@ -4229,6 +4667,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_bo_ref(NULL, &p->bo); + FREE(p->immd); nouveau_resource_free(&p->data[0]); p->translated = 0; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 461fec1d89c..1e3ad6bff05 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -16,11 +16,13 @@ struct nv50_program_exec { }; struct nv50_sreg4 { - uint8_t hw; - uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ + uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ + uint8_t id; /* tgsi index */ uint8_t mask; boolean linear; + + ubyte sn, si; /* semantic name & index */ }; struct nv50_program { @@ -49,16 +51,24 @@ struct nv50_program { uint32_t regs[4]; /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ - unsigned io_nr; - struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + unsigned in_nr, out_nr; + struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS]; + struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS]; /* FP colour inputs, VP/GP back colour outputs */ struct nv50_sreg4 two_side[2]; - /* VP only */ + /* GP only */ + unsigned vert_count; + uint8_t prim_type; + + /* VP & GP only */ uint8_t clpd, clpd_nr; uint8_t psiz; uint8_t edgeflag_in; + + /* FP & GP only */ + uint8_t prim_id; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 28e2b35deaa..9d58f3c9651 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -167,7 +167,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) struct nv50_screen *screen = nv50_screen(pscreen); unsigned i; - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { if (screen->constbuf_parm[i]) nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); } @@ -329,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(40, 84, 20); + so = so_new(47, 95, 24); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); @@ -352,10 +352,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); - so_data (so, 0x54); - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); - so_data (so, 0x54); + for (i = 0; i < 3; ++i) { + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1); + so_data (so, 0x54); + } + /* origin is top left (set to 1 for bottom left) */ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); @@ -370,8 +371,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - for (i = 0; i < 2; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4, + for (i = 0; i < 3; i++) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -406,22 +407,45 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000021 | (NV50_CB_PMISC << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); + /* bind auxiliary constbuf to immediate data bo */ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_AUX << 16) | 0x00000200); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000201 | (NV50_CB_AUX << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000221 | (NV50_CB_AUX << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PVP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PGP << 16) | 0x00000800); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000121 | (NV50_CB_PGP << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PFP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index a038a4e3c2a..0d786b0f2e3 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -18,10 +18,12 @@ struct nv50_screen { struct nouveau_notifier *sync; struct nouveau_bo *constbuf_misc[1]; - struct nouveau_bo *constbuf_parm[2]; + struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[2]; + struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; + + struct pipe_buffer *strm_vbuf[16]; struct nouveau_bo *tic; struct nouveau_bo *tsc; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 1f67df814b1..6ab33be663d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -531,7 +531,7 @@ nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -563,7 +563,39 @@ nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_GEOMETRY; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_GEOMPROG; +} + +static void +nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -585,17 +617,21 @@ nv50_set_clip_state(struct pipe_context *pipe, static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv50_context *nv50 = nv50_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->constbuf[PIPE_SHADER_VERTEX] = buf; nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } else + if (shader == PIPE_SHADER_GEOMETRY) { + nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; + nv50->dirty |= NV50_NEW_GEOMPROG_CB; } } @@ -696,6 +732,10 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_fs_state = nv50_fp_state_bind; nv50->pipe.delete_fs_state = nv50_fp_state_delete; + nv50->pipe.create_gs_state = nv50_gp_state_create; + nv50->pipe.bind_gs_state = nv50_gp_state_bind; + nv50->pipe.delete_gs_state = nv50_gp_state_delete; + nv50->pipe.set_blend_color = nv50_set_blend_color; nv50->pipe.set_clip_state = nv50_set_clip_state; nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f83232f43cf..956da9b304c 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -199,6 +199,8 @@ nv50_state_emit(struct nv50_context *nv50) nv50->state.dirty |= NV50_NEW_VERTPROG; if (nv50->state.fragprog) nv50->state.dirty |= NV50_NEW_FRAGPROG; + if (nv50->state.geomprog) + nv50->state.dirty |= NV50_NEW_GEOMPROG; if (nv50->state.rast) nv50->state.dirty |= NV50_NEW_RASTERIZER; if (nv50->state.blend_colour) @@ -228,9 +230,14 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog) + so_emit(chan, nv50->state.geomprog); if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - so_emit(chan, nv50->state.programs); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + so_emit(chan, nv50->state.fp_linkage); + if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG)) + && nv50->state.gp_linkage) + so_emit(chan, nv50->state.gp_linkage); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -267,6 +274,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan) so_emit_reloc_markers(chan, nv50->state.fragprog); so_emit_reloc_markers(chan, nv50->state.vtxbuf); so_emit_reloc_markers(chan, nv50->screen->static_init); + + if (nv50->state.instbuf) + so_emit_reloc_markers(chan, nv50->state.instbuf); } boolean @@ -291,9 +301,15 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB)) + nv50_geomprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - nv50_linkage_validate(nv50); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + nv50_fp_linkage_validate(nv50); + + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG)) + nv50_gp_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); @@ -400,8 +416,9 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES - + nr * 8, PIPE_SHADER_TYPES * 2); + so = so_new(1 + 5 * PIPE_SHADER_TYPES, + 1 + 19 * PIPE_SHADER_TYPES + nr * 8, + PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index bef548b7286..871536dca9b 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -155,7 +155,7 @@ static boolean nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, unsigned p) { - static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 }; + static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2, 1 }; struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f2e510fba61..bfb1b34d27a 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -40,6 +40,8 @@ nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); static boolean nv50_push_arrays(struct nv50_context *, unsigned, unsigned); +#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16) + static INLINE unsigned nv50_prim(unsigned mode) { @@ -55,6 +57,14 @@ nv50_prim(unsigned mode) case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + case PIPE_PRIM_LINES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; default: break; } @@ -152,6 +162,309 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } +/* For instanced drawing from user buffers, hitting the FIFO repeatedly + * with the same vertex data is probably worse than uploading all data. + */ +static boolean +nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i) +{ + struct nv50_screen *nscreen = nv50->screen; + struct pipe_screen *pscreen = &nscreen->base.base; + struct pipe_buffer *buf = nscreen->strm_vbuf[i]; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + uint8_t *src; + unsigned size = align(vb->buffer->size, 4096); + + if (buf && buf->size < size) + pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL); + + if (!nscreen->strm_vbuf[i]) { + nscreen->strm_vbuf[i] = pipe_buffer_create( + pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size); + buf = nscreen->strm_vbuf[i]; + } + + src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if (!src) + return FALSE; + src += vb->buffer_offset; + + size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */ + if (vb->buffer_offset + size > vb->buffer->size) + size = vb->buffer->size - vb->buffer_offset; + + pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src); + pipe_buffer_unmap(pscreen, vb->buffer); + + vb->buffer = buf; /* don't pipe_reference, this is a private copy */ + return TRUE; +} + +static void +nv50_upload_user_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + if (nv50->vbo_fifo) + nv50->dirty |= NV50_NEW_ARRAYS; + if (!(nv50->dirty & NV50_NEW_ARRAYS)) + return; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) { + if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX) + continue; + nv50_upload_vtxbuf(nv50, i); + } +} + +static void +nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + float v[4]; + + util_format_read_4f(nv50->vtxelt[i].src_format, + v, 0, data, 0, 0, 0, 1, 1); + + switch (nv50->vtxelt[i].nr_components) { + case 4: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + OUT_RINGf (chan, v[3]); + break; + case 3: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + break; + case 2: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + break; + case 1: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1); + OUT_RINGf (chan, v[0]); + break; + default: + assert(0); + break; + } +} + +static unsigned +init_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned startInstance, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b, count = 0; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + ++count; + b = nv50->vtxelt[i].vertex_buffer_index; + + pos[i] = nv50->vtxelt[i].src_offset + + nv50->vtxbuf[b].buffer_offset + + startInstance * nv50->vtxbuf[b].stride; + step[i] = startInstance % nv50->vtxelt[i].instance_divisor; + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + if (!bo->map) + nouveau_bo_map(bo, NOUVEAU_BO_RD); + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } + + return count; +} + +static unsigned +init_per_instance_arrays(struct nv50_context *nv50, + unsigned startInstance, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct nouveau_bo *bo; + struct nouveau_stateobj *so; + unsigned i, b, count = 0; + const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + if (nv50->vbo_fifo) + return init_per_instance_arrays_immd(nv50, startInstance, + pos, step); + + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + ++count; + b = nv50->vtxelt[i].vertex_buffer_index; + + pos[i] = nv50->vtxelt[i].src_offset + + nv50->vtxbuf[b].buffer_offset + + startInstance * nv50->vtxbuf[b].stride; + + if (!startInstance) { + step[i] = 0; + continue; + } + step[i] = startInstance % nv50->vtxelt[i].instance_divisor; + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); + } + + if (count && startInstance) { + so_ref (so, &nv50->state.instbuf); /* for flush notify */ + so_emit(chan, nv50->state.instbuf); + } + so_ref (NULL, &so); + + return count; +} + +static void +step_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + if (++step[i] != nv50->vtxelt[i].instance_divisor) + continue; + b = nv50->vtxelt[i].vertex_buffer_index; + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + step[i] = 0; + pos[i] += nv50->vtxbuf[b].stride; + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } +} + +static void +step_per_instance_arrays(struct nv50_context *nv50, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct nouveau_bo *bo; + struct nouveau_stateobj *so; + unsigned i, b; + const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + if (nv50->vbo_fifo) { + step_per_instance_arrays_immd(nv50, pos, step); + return; + } + + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + b = nv50->vtxelt[i].vertex_buffer_index; + + if (++step[i] == nv50->vtxelt[i].instance_divisor) { + step[i] = 0; + pos[i] += nv50->vtxbuf[b].stride; + } + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); + } + + so_ref (so, &nv50->state.instbuf); /* for flush notify */ + so_ref (NULL, &so); + + so_emit(chan, nv50->state.instbuf); +} + +static INLINE void +nv50_unmap_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); +} + +void +nv50_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count, + unsigned startInstance, unsigned instanceCount) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->tesla->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned i, nz_divisors; + unsigned step[16], pos[16]; + + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); + + nv50_state_validate(nv50); + + nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, startInstance); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + for (i = 1; i < instanceCount; i++) { + if (nz_divisors) /* any non-zero array divisors ? */ + step_per_instance_arrays(nv50, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode) | (1 << 28)); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } + nv50_unmap_vbufs(nv50); + + so_ref(NULL, &nv50->state.instbuf); +} + void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -182,6 +495,8 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + /* XXX: not sure what to do if ret != TRUE: flush and retry? */ assert(ret); @@ -210,7 +525,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -243,7 +558,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -268,7 +583,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -277,6 +592,77 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } +static INLINE void +nv50_draw_elements_inline(struct nv50_context *nv50, + void *map, unsigned indexSize, + unsigned start, unsigned count) +{ + switch (indexSize) { + case 1: + nv50_draw_elements_inline_u08(nv50, map, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(nv50, map, start, count); + break; + case 4: + nv50_draw_elements_inline_u32(nv50, map, start, count); + break; + } +} + +void +nv50_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count, + unsigned startInstance, unsigned instanceCount) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct pipe_screen *pscreen = pipe->screen; + void *map; + unsigned i, nz_divisors; + unsigned step[16], pos[16]; + + map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); + + nv50_state_validate(nv50); + + nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, startInstance); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + for (i = 1; i < instanceCount; ++i) { + if (nz_divisors) /* any non-zero array divisors ? */ + step_per_instance_arrays(nv50, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode) | (1 << 28)); + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } + nv50_unmap_vbufs(nv50); + + so_ref(NULL, &nv50->state.instbuf); +} + void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -287,7 +673,6 @@ nv50_draw_elements(struct pipe_context *pipe, struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_screen *pscreen = pipe->screen; void *map; - boolean ret; map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -300,29 +685,15 @@ nv50_draw_elements(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode)); - switch (indexSize) { - case 1: - ret = nv50_draw_elements_inline_u08(nv50, map, start, count); - break; - case 2: - ret = nv50_draw_elements_inline_u16(nv50, map, start, count); - break; - case 4: - ret = nv50_draw_elements_inline_u32(nv50, map, start, count); - break; - default: - assert(0); - ret = FALSE; - break; - } + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + pipe_buffer_unmap(pscreen, indexBuffer); - - /* XXX: what to do if ret != TRUE? Flush and retry? - */ - assert(ret); } static INLINE boolean @@ -335,23 +706,16 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, struct nouveau_stateobj *so; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_bo *bo = nouveau_bo(vb->buffer); - float *v; + float v[4]; int ret; - enum pipe_format pf = ve->src_format; - const struct util_format_description *desc; - - desc = util_format_description(pf); - assert(desc); - - if ((desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT) || - util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32) - return FALSE; ret = nouveau_bo_map(bo, NOUVEAU_BO_RD); if (ret) return FALSE; - v = (float *)(bo->map + (vb->buffer_offset + ve->src_offset)); + util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map + + (vb->buffer_offset + ve->src_offset), 0, + 0, 0, 1, 1); so = *pso; if (!so) *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); @@ -409,7 +773,7 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; - if (nv50->vertprog->cfg.edgeflag_in < 16) + if (NV50_USING_LOATHED_EDGEFLAG(nv50)) nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); @@ -437,17 +801,20 @@ nv50_vbo_validate(struct nv50_context *nv50) nv50->vbo_fifo &= ~(1 << i); continue; } - so_data(vtxfmt, hw | i); if (nv50->vbo_fifo) { + so_data (vtxfmt, hw | + (ve->instance_divisor ? (1 << 4) : i)); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); continue; } + so_data(vtxfmt, hw | i); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); - so_data (vtxbuf, 0x20000000 | vb->stride); + so_data (vtxbuf, 0x20000000 | + (ve->instance_divisor ? 0 : vb->stride)); so_reloc (vtxbuf, bo, vb->buffer_offset + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -485,7 +852,7 @@ typedef void (*pfn_push)(struct nouveau_channel *, void *); struct nv50_vbo_emitctx { pfn_push push[16]; - void *map[16]; + uint8_t *map[16]; unsigned stride[16]; unsigned nr_ve; unsigned vtx_dwords; @@ -523,19 +890,18 @@ nv50_map_vbufs(struct nv50_context *nv50) for (i = 0; i < nv50->vtxbuf_nr; ++i) { struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; - unsigned size, delta; + unsigned size = vb->stride * (vb->max_index + 1) + 16; if (nouveau_bo(vb->buffer)->map) continue; - size = vb->stride * (vb->max_index + 1); - delta = vb->buffer_offset; - + size = vb->stride * (vb->max_index + 1) + 16; + size = MIN2(size, vb->buffer->size); if (!size) - size = vb->buffer->size - vb->buffer_offset; + size = vb->buffer->size; if (nouveau_bo_map_range(nouveau_bo(vb->buffer), - delta, size, NOUVEAU_BO_RD)) + 0, size, NOUVEAU_BO_RD)) break; } @@ -546,16 +912,6 @@ nv50_map_vbufs(struct nv50_context *nv50) return FALSE; } -static INLINE void -nv50_unmap_vbufs(struct nv50_context *nv50) -{ - unsigned i; - - for (i = 0; i < nv50->vtxbuf_nr; ++i) - if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) - nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); -} - static void emit_b32_1(struct nouveau_channel *chan, void *data) { @@ -650,12 +1006,13 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, ve = &nv50->vtxelt[i]; vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - if (!(nv50->vbo_fifo & (1 << i))) + if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor) continue; n = emit->nr_ve++; emit->stride[n] = vb->stride; - emit->map[n] = nouveau_bo(vb->buffer)->map + + emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map + + vb->buffer_offset + (start * vb->stride + ve->src_offset); desc = util_format_description(ve->src_format); @@ -745,13 +1102,12 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -772,13 +1128,12 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -799,13 +1154,12 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -826,13 +1180,12 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff34..92de297ef1d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a72..28084864929 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index af95bbe789c..94a9ab3ef3f 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -87,7 +87,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, { struct pipe_buffer* buf = 0; - r300_get_texture_buffer(texture, &buf, NULL); + r300_get_texture_buffer(pipe->screen, texture, &buf, NULL); return pipe->is_buffer_referenced(pipe, buf); } @@ -110,23 +110,33 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } -#define R300_INIT_ATOM(name) \ - r300->name##_state.state = NULL; \ - r300->name##_state.emit = r300_emit_##name##_state; \ - r300->name##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->name##_state); +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); static void r300_setup_atoms(struct r300_context* r300) { + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(ztop); - R300_INIT_ATOM(blend); - R300_INIT_ATOM(blend_color); - R300_INIT_ATOM(clip); - R300_INIT_ATOM(dsa); - R300_INIT_ATOM(rs); - R300_INIT_ATOM(scissor); - R300_INIT_ATOM(viewport); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(rs, 22); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -143,8 +153,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)radeon_winsys; r300->context.screen = screen; - r300_init_debug(r300); - r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; @@ -182,7 +190,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 5937f0e2cc5..5e33dc042ac 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,16 +30,28 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "r300_screen.h" + struct r300_context; struct r300_fragment_shader; struct r300_vertex_shader; struct r300_atom { + /* List pointers. */ struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ void* state; + /* Emit the state to the context. */ void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; }; struct r300_blend_state { @@ -75,10 +87,10 @@ struct r300_rs_state { uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ - uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ - uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ - uint32_t depth_scale_back; /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ - uint32_t depth_offset_back; /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ + float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ + /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ + float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ + /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ @@ -106,16 +118,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -188,6 +190,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -224,6 +232,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -315,9 +326,10 @@ struct r300_context { uint32_t dirty_hw; /* Whether the TCL engine should be in bypass mode. */ boolean tcl_bypass; - - /** Combination of DBG_xxx flags */ - unsigned debug; + /* Whether polygon offset is enabled. */ + boolean polygon_offset_enabled; + /* Z buffer bit depth. */ + uint32_t zbuffer_bpp; }; /* Convenience cast wrapper. */ @@ -331,35 +343,15 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); -/* Debug functionality. */ - -/** - * Debug flags to disable/enable certain groups of debugging outputs. - * - * \note These may be rather coarse, and the grouping may be impractical. - * If you find, while debugging the driver, that a different grouping - * of these flags would be beneficial, just feel free to change them - * but make sure to update the documentation in r300_debug.c to reflect - * those changes. - */ -/*@{*/ -#define DBG_HELP 0x0000001 -#define DBG_FP 0x0000002 -#define DBG_VP 0x0000004 -#define DBG_CS 0x0000008 -#define DBG_DRAW 0x0000010 -#define DBG_TEX 0x0000020 -#define DBG_FALL 0x0000040 -/*@}*/ - -static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { - return (ctx->debug & flags) ? TRUE : FALSE; + return SCREEN_DBG_ON(r300_screen(ctx->context.screen), flags); } -static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags, + const char * fmt, ...) { - if (DBG_ON(ctx, flags)) { + if (CTX_DBG_ON(ctx, flags)) { va_list va; va_start(va, fmt); debug_vprintf(fmt, va); @@ -367,6 +359,8 @@ static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * f } } -void r300_init_debug(struct r300_context * ctx); +#define DBG_ON CTX_DBG_ON +#define DBG CTX_DBG #endif /* R300_CONTEXT_H */ + diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee0502..151f72b0fe4 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 2a6ed54ac9b..00d4f31c2b6 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -46,7 +46,7 @@ static struct debug_option debug_options[] = { { 0, 0, 0 } }; -void r300_init_debug(struct r300_context * ctx) +void r300_init_debug(struct r300_screen * screen) { const char * options = debug_get_option("RADEON_DEBUG", 0); boolean printhint = FALSE; @@ -64,7 +64,7 @@ void r300_init_debug(struct r300_context * ctx) for(opt = debug_options; opt->name; ++opt) { if (!strncmp(options, opt->name, length)) { - ctx->debug |= opt->flag; + screen->debug |= opt->flag; break; } } @@ -77,11 +77,11 @@ void r300_init_debug(struct r300_context * ctx) options += length; } - if (!ctx->debug) + if (!screen->debug) printhint = TRUE; } - if (printhint || ctx->debug & DBG_HELP) { + if (printhint || screen->debug & DBG_HELP) { debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" "to a comma-separated list of debug options. Available options are:\n"); for(opt = debug_options; opt->name; ++opt) { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0e5533c7902..2ea9fab015d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -41,6 +41,7 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); + BEGIN_CS(8); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); @@ -419,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -443,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -579,19 +584,37 @@ void r300_emit_query_end(struct r300_context* r300) void r300_emit_rs_state(struct r300_context* r300, void* state) { struct r300_rs_state* rs = (struct r300_rs_state*)state; + float scale, offset; CS_LOCALS(r300); - BEGIN_CS(22); + BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0)); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); OUT_CS(rs->point_minmax); OUT_CS(rs->line_control); - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6); - OUT_CS(rs->depth_scale_front); - OUT_CS(rs->depth_offset_front); - OUT_CS(rs->depth_scale_back); - OUT_CS(rs->depth_offset_back); + + if (rs->polygon_offset_enable) { + scale = rs->depth_scale * 12; + offset = rs->depth_offset; + + switch (r300->zbuffer_bpp) { + case 16: + offset *= 4; + break; + case 24: + offset *= 2; + break; + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); OUT_CS(rs->polygon_offset_enable); OUT_CS(rs->cull_mode); OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); @@ -641,27 +664,47 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = r300->framebuffer_state.width; + maxy = r300->framebuffer_state.height; -void r300_emit_scissor_state(struct r300_context* r300, void* state) -{ - struct r300_scissor_state* scissor = (struct r300_scissor_state*)state; - /* XXX argfl! */ if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -680,12 +723,18 @@ void r300_emit_texture(struct r300_context* r300, filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - /* determine min/max levels */ - /* the MAX_MIP level is the largest (finest) one */ - max_level = MIN2(sampler->max_lod, tex->tex.last_level); - min_level = MIN2(sampler->min_lod, max_level); - format0 |= R300_TX_NUM_LEVELS(max_level); - filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + if (tex->is_npot) { + /* NPOT textures don't support mip filter, unfortunately. + * This prevents incorrect rendering. */ + filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + } else { + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + } BEGIN_CS(16); OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | @@ -697,8 +746,10 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } @@ -764,32 +815,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) -{ - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -992,14 +1017,23 @@ void r300_emit_dirty_state(struct r300_context* r300) struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; struct r300_atom* atom; - int i, dirty_tex = 0; + unsigned i, dwords = 1024; + int dirty_tex = 0; boolean invalid = FALSE; - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } + } + + /* Make sure we have at least 2*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + if (!r300->winsys->check_cs(r300->winsys, dwords)) { r300->context.flush(&r300->context, 0, NULL); } @@ -1039,10 +1073,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -1071,7 +1107,7 @@ validate: } foreach(atom, &r300->atom_list) { - if (atom->dirty) { + if (atom->dirty || atom->always_dirty) { atom->emit(r300, atom->state); atom->dirty = FALSE; } diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc4..59819cb1061 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,8 +37,10 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); + (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -54,7 +56,15 @@ static void r300_flush(struct pipe_context* pipe, r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 034bfc15cf9..361813891fb 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2283,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2544,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index ee43421cdb7..90de062bcda 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -114,6 +114,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static void r300_emit_draw_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; + unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); + unsigned i; + uint32_t* map; + CS_LOCALS(r300); + + map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, + start * vertex_size, count * vertex_size, + PIPE_BUFFER_USAGE_CPU_READ); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); + for (i = 0; i < count * vertex_size; i++) { + if (i % vertex_size == 0) { + //debug_printf("r300: -- vert --\n"); + } + //debug_printf("r300: 0x%08x\n", *map); + OUT_CS(*map); + map++; + } + END_CS; + + pipe_buffer_unmap(r300->context.screen, vbo); +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -207,17 +245,49 @@ validate: return TRUE; } +static void r300_shorten_ubyte_elts(struct r300_context* r300, + struct pipe_buffer** elts, + unsigned count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + unsigned char *in_map; + unsigned short *out_map; + unsigned i; + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + 2 * count); + + in_map = pipe_buffer_map(screen, *elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)*in_map; + in_map++; + out_map++; + } + + pipe_buffer_unmap(screen, *elts); + pipe_buffer_unmap(screen, new_elts); + + *elts = new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { return; @@ -236,13 +306,18 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } + if (indexSize == 1) { + r300_shorten_ubyte_elts(r300, &indexBuffer, count); + indexSize = 2; + } + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return; + goto cleanup; } r300_emit_dirty_state(r300); @@ -251,6 +326,11 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); + +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ @@ -264,7 +344,7 @@ void r300_draw_elements(struct pipe_context* pipe, } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); @@ -287,9 +367,12 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); - r300_emit_aos(r300, start); - - r300_emit_draw_arrays(r300, mode, count); + if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { + r300_emit_draw_immediate(r300, mode, start, count); + } else { + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 287664b1d20..67325c6b80a 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -183,10 +183,20 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_format(enum pipe_format format, uint32_t usage, - boolean is_r500) +static boolean r300_is_format_supported(struct pipe_screen* screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned usage, + unsigned geom_flags) { uint32_t retval = 0; + boolean is_r500 = r300_screen(screen)->caps->is_r500; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + debug_printf("r300: Implementation error: Received bogus texture " + "target %d in %s\n", target, __FUNCTION__); + return FALSE; + } switch (format) { /* Supported formats. */ @@ -247,28 +257,13 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: - debug_printf("r300: Note: Got unsupported format: %s in %s\n", - pf_name(format), __FUNCTION__); + SCREEN_DBG(r300_screen(screen), DBG_TEX, + "r300: Note: Got unsupported format: %s in %s\n", + pf_name(format), __FUNCTION__); return FALSE; - /* XXX These don't even exist - case PIPE_FORMAT_A32R32G32B32: - case PIPE_FORMAT_A16R16G16B16: */ - /* XXX What the deuce is UV88? (r3xx accel page 14) - debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ - - /* XXX Supported yet unimplemented r5xx formats: */ - /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */ - /* XXX Even more that don't exist - case PIPE_FORMAT_A10R10G10B10_UNORM: - case PIPE_FORMAT_A2R10G10B10_UNORM: - case PIPE_FORMAT_I10_UNORM: - debug_printf( - "r300: Warning: Got unimplemented r500 format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ + /* XXX Add all remaining gallium-supported formats, + * see util/u_format.csv. */ default: /* Unknown format... */ @@ -286,30 +281,6 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, return (retval >= usage); } -static boolean r300_is_format_supported(struct pipe_screen* pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, - unsigned geom_flags) -{ - switch (target) { - case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_3D: - case PIPE_TEXTURE_CUBE: - return check_tex_format(format, tex_usage, - r300_screen(pscreen)->caps->is_r500); - - default: - debug_printf("r300: Fatal: This is not a format target: %d\n", - target); - assert(0); - break; - } - - return FALSE; -} - static struct pipe_transfer* r300_get_tex_transfer(struct pipe_screen *screen, struct pipe_texture *texture, @@ -319,6 +290,7 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; + struct r300_screen *rscreen = r300_screen(screen); unsigned offset; offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ @@ -330,11 +302,8 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; - trans->transfer.stride = r300_texture_get_stride(tex, level); + trans->transfer.stride = r300_texture_get_stride(rscreen, tex, level); trans->transfer.usage = usage; - - /* XXX not sure whether it's required to set these two, - the driver doesn't use them */ trans->transfer.zslice = zslice; trans->transfer.face = face; @@ -396,6 +365,7 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) caps->num_frag_pipes = radeon_winsys->gb_pipes; caps->num_z_pipes = radeon_winsys->z_pipes; + r300_init_debug(r300screen); r300_parse_chipset(caps); r300screen->caps = caps; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 2217988addd..580fda3984e 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -35,6 +35,9 @@ struct r300_screen { /* Chipset capabilities */ struct r300_capabilities* caps; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; struct r300_transfer { @@ -60,4 +63,44 @@ r300_transfer(struct pipe_transfer* transfer) /* Creates a new r300 screen. */ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +#define DBG_TEX 0x0000020 +#define DBG_FALL 0x0000040 +/*@}*/ + +static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) +{ + return (screen->debug & flags) ? TRUE : FALSE; +} + +static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags, + const char * fmt, ...) +{ + if (SCREEN_DBG_ON(screen, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_screen* ctx); + #endif /* R300_SCREEN_H */ + diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 78764ddc98e..e2ec0bc5bd2 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -340,6 +340,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_color_state* state = (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; @@ -355,6 +356,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; r300->blend_color_state.dirty = TRUE; } @@ -365,11 +367,14 @@ static void r300_set_clip_state(struct pipe_context* pipe, if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); - r300->clip_state.dirty = TRUE; + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -462,8 +467,10 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; r300->dsa_state.dirty = TRUE; } @@ -474,36 +481,12 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - struct pipe_scissor_state pscissor; + uint32_t zbuffer_bpp = 0; if (r300->draw) { draw_flush(r300->draw); @@ -511,19 +494,29 @@ static void r300->framebuffer_state = *state; - /* XXX Arg. This is silly. */ - pscissor.minx = pscissor.miny = 0; - pscissor.maxx = state->width; - pscissor.maxy = state->height; - r300_set_scissor_regs(&pscissor, &scissor->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ r300->dirty_state |= R300_NEW_FRAMEBUFFERS; r300->blend_state.dirty = TRUE; r300->dsa_state.dirty = TRUE; r300->scissor_state.dirty = TRUE; + + /* Polyfon offset depends on the zbuffer bit depth. */ + if (state->zsbuf && r300->polygon_offset_enabled) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; + } + + if (r300->zbuffer_bpp != zbuffer_bpp) { + r300->zbuffer_bpp = zbuffer_bpp; + r300->rs_state.dirty = TRUE; + } + } } /* Create fragment shader state. */ @@ -627,9 +620,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; - /* XXX I think there is something wrong with the polygon mode, - * XXX re-test when r300g is in a better shape */ - /* Enable polygon mode */ if (state->fill_cw != PIPE_POLYGON_MODE_FILL || state->fill_ccw != PIPE_POLYGON_MODE_FILL) { @@ -682,10 +672,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } if (rs->polygon_offset_enable) { - rs->depth_offset_front = rs->depth_offset_back = - fui(state->offset_units); - rs->depth_scale_front = rs->depth_scale_back = - fui(state->offset_scale); + rs->depth_offset = state->offset_units; + rs->depth_scale = state->offset_scale; } if (state->line_stipple_enable) { @@ -717,7 +705,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; + } else { + r300->tcl_bypass = FALSE; + r300->polygon_offset_enabled = FALSE; + } r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; @@ -864,11 +858,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - r300_set_scissor_regs(state, &scissor->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); r300->scissor_state.dirty = TRUE; } @@ -1015,22 +1007,22 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) static void r300_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct r300_context* r300 = r300_context(pipe); void *mapped; - if (buf == NULL || buf->buffer->size == 0 || - (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) + if (buf == NULL || buf->size == 0 || + (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) { r300->shader_constants[shader].count = 0; return; } - assert((buf->buffer->size % 4 * sizeof(float)) == 0); - memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size); - r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pipe->screen, buf->buffer); + assert((buf->size % 4 * sizeof(float)) == 0); + memcpy(r300->shader_constants[shader].constants, mapped, buf->size); + r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pipe->screen, buf); if (shader == PIPE_SHADER_VERTEX) r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 192846411ba..99c2720897f 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -350,7 +350,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ rX00_rs_col(rs, col_count, i, FALSE); diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 35be00e1b01..e2180b33b77 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -537,6 +537,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); + unsigned swizzle[4], i; assert(format); @@ -547,11 +548,26 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT)); + /* Swizzles for 8bits formats are in the reversed order, not sure why. */ + if (desc->channel[0].size == 8) { + for (i = 0; i < 4; i++) { + if (desc->swizzle[i] <= 3) { + swizzle[i] = 3 - desc->swizzle[i]; + } else { + swizzle[i] = desc->swizzle[i]; + } + } + } else { + for (i = 0; i < 4; i++) { + swizzle[i] = desc->swizzle[i]; + } + } + + return ((swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT)); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index f25f3ca217d..47d7e60a40d 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -79,7 +79,8 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + + (caps->family >= CHIP_FAMILY_RV350 ? 4 : 0)); if (caps->has_tcl) { /*Flushing PVS is required before the VAP_GB registers can be changed*/ @@ -115,10 +116,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { + + if (caps->family >= CHIP_FAMILY_RV350) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4dc..1f73f74c268 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,10 +30,23 @@ #include "r300_texture.h" #include "r300_screen.h" -static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + +static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; + boolean is_r500 = screen->caps->is_r500; state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); @@ -67,8 +80,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) } assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); - debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width0, pt->height0, pt->last_level); + SCREEN_DBG(screen, DBG_TEX, "r300: Set texture state (%dx%d, %d levels)\n", + pt->width0, pt->height0, pt->last_level); } unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, @@ -92,33 +105,78 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { - debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, - level, tex->tex.last_level); + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + width = u_minify(tex->tex.width0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(width, tile_width); + return util_format_get_stride(tex->tex.format, width); + } else { + return align(util_format_get_stride(tex->tex.format, width), 32); + } +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + height = u_minify(tex->tex.height0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(height, tile_height); + } + + return util_format_get_nblocksy(tex->tex.format, height); } -static void r300_setup_miptree(struct r300_texture* tex) +static void r300_setup_miptree(struct r300_screen* screen, + struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + SCREEN_DBG(screen, DBG_TEX, "r300: Making miptree for texture, format %s\n", + pf_name(base->format)); - stride = r300_texture_get_stride(tex, i); + for (i = 0; i <= base->last_level; i++) { + stride = r300_texture_get_stride(screen, tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -131,10 +189,10 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); - debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -150,6 +208,7 @@ static struct pipe_texture* const struct pipe_texture* template) { struct r300_texture* tex = CALLOC_STRUCT(r300_texture); + struct r300_screen* rscreen = r300_screen(screen); if (!tex) { return NULL; @@ -160,10 +219,10 @@ static struct pipe_texture* tex->tex.screen = screen; r300_setup_flags(tex); - r300_setup_miptree(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_miptree(rscreen, tex); + r300_setup_texture_state(rscreen, tex); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); @@ -227,6 +286,7 @@ static struct pipe_texture* struct pipe_buffer* buffer) { struct r300_texture* tex; + struct r300_screen* rscreen = r300_screen(screen); /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || @@ -248,7 +308,7 @@ static struct pipe_texture* tex->pitch[0] = *stride / util_format_get_blocksize(base->format); r300_setup_flags(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_texture_state(rscreen, tex); pipe_buffer_reference(&tex->buffer, buffer); @@ -315,7 +375,8 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) screen->video_surface_destroy= r300_video_surface_destroy; } -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride) { @@ -327,7 +388,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = r300_texture_get_stride(tex, 0); + *stride = r300_texture_get_stride(r300_screen(screen), tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 55ceb1a5136..1be1e6843c2 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -31,7 +31,8 @@ struct r300_texture; void r300_init_screen_texture_functions(struct pipe_screen* screen); -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level); unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, unsigned zslice, unsigned face); @@ -115,7 +116,8 @@ r300_video_surface(struct pipe_video_surface *pvs) #ifndef R300_WINSYS_H -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 68aef70872e..9fbb830047f 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -124,7 +124,8 @@ static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } @@ -182,7 +183,8 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { stream_loc[tabi++] = 2 + i; } } @@ -259,7 +261,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; - } else if (any_bcolor_used) { + } else if (any_bcolor_used || + outputs->color[1] != ATTR_UNUSED) { reg++; } } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 1ae6de70fee..bdb8b54bab6 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -40,7 +40,8 @@ extern "C" { struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys); -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index bcb887a0b26..e4ac49fa85f 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -32,6 +32,7 @@ C_SOURCES = \ sp_tex_tile_cache.c \ sp_tile_cache.c \ sp_surface.c \ - sp_video_context.c + sp_video_context.c \ + sp_winsys.c include ../../Makefile.template diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index aac9edf44e6..3042e556c64 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -34,6 +34,7 @@ softpipe = env.ConvenienceLibrary( 'sp_texture.c', 'sp_tile_cache.c', 'sp_video_context.c', + 'sp_winsys.c' ]) Export('softpipe') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index f3ac6760db5..8e017939402 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -113,8 +113,8 @@ softpipe_destroy( struct pipe_context *pipe ) } for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(&softpipe->constants[i].buffer, NULL); + if (softpipe->constants[i]) { + pipe_buffer_reference(&softpipe->constants[i], NULL); } } @@ -256,6 +256,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; + softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 73fa744f9d4..da673c57ada 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -63,7 +63,7 @@ struct softpipe_context { /** Other rendering state */ struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 03d35fb3cb5..03b58d2fb72 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -52,18 +52,18 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) uint i, vssize, gssize; for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + if (sp->constants[i] && sp->constants[i]->size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], PIPE_BUFFER_USAGE_CPU_READ); } - if (sp->constants[PIPE_SHADER_VERTEX].buffer) - vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size; + if (sp->constants[PIPE_SHADER_VERTEX]) + vssize = sp->constants[PIPE_SHADER_VERTEX]->size; else vssize = 0; - if (sp->constants[PIPE_SHADER_GEOMETRY].buffer) - gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size; + if (sp->constants[PIPE_SHADER_GEOMETRY]) + gssize = sp->constants[PIPE_SHADER_GEOMETRY]->size; else gssize = 0; @@ -91,26 +91,48 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + if (sp->constants[i] && sp->constants[i]->size) + ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; } } +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, @@ -119,6 +141,91 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); +} + + +void +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); +} + +void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; unsigned i; @@ -128,45 +235,48 @@ softpipe_draw_range_elements(struct pipe_context *pipe, sp->reduced_api_prim = u_reduced_prim(mode); - if (sp->dirty) - softpipe_update_derived( sp ); + if (sp->dirty) { + softpipe_update_derived(sp); + } softpipe_map_transfers(sp); softpipe_map_constant_buffers(sp); - /* - * Map vertex buffers - */ + /* Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf - = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *buf; + + buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, + void *mapped_indexes; + + mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, + indexSize, + minIndex, + maxIndex, mapped_indexes); - } - else { + } else { /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); + draw_set_mapped_element_buffer_range(draw, + 0, + start, + start + count - 1, + NULL); } /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); - /* - * unmap vertex/index buffers - will cause draw module to flush - */ + /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); @@ -176,22 +286,8 @@ softpipe_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); } - /* Note: leave drawing surfaces mapped */ softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; } - - -void -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); -} diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 7f573aef3c3..5812d1eefeb 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -526,6 +526,7 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + align_free(cvbr->vertex_buffer); sp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -541,7 +542,6 @@ sp_create_vbuf_backend(struct softpipe_context *sp) assert(sp->draw); - cvbr->base.max_indices = SP_MAX_VBUF_INDEXES; cvbr->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 9b18dac67bd..7f244c4fd49 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -139,7 +139,7 @@ void softpipe_set_clip_state( struct pipe_context *, void softpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); void *softpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); @@ -200,6 +200,24 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void softpipe_map_transfers(struct softpipe_context *sp); void diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index aa12bb215a8..b7ed4441b43 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -159,7 +159,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) void softpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -169,8 +169,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, draw_flush(softpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&softpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&softpipe->constants[shader], buf); softpipe->dirty |= SP_NEW_CONSTANTS; } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index a9436a33942..fae72c81aa1 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -57,13 +57,8 @@ softpipe_texture_layout(struct pipe_screen *screen, unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned buffer_size = 0; - pt->width0 = width; - pt->height0 = height; - pt->depth0 = depth; - for (level = 0; level <= pt->last_level; level++) { spt->stride[level] = util_format_get_stride(pt->format, width); diff --git a/src/gallium/drivers/softpipe/sp_winsys.c b/src/gallium/drivers/softpipe/sp_winsys.c new file mode 100644 index 00000000000..8169071dc9f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.c @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * Malloc softpipe winsys. Uses malloc for all memory allocations. + * + * @author Keith Whitwell + * @author Brian Paul + * @author Jose Fonseca + */ + + +#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */ +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "softpipe/sp_winsys.h" + + +struct st_softpipe_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; + void *mapped; +}; + + +/** Cast wrapper */ +static INLINE struct st_softpipe_buffer * +st_softpipe_buffer( struct pipe_buffer *buf ) +{ + return (struct st_softpipe_buffer *)buf; +} + + +static void * +st_softpipe_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = st_softpipe_buf->data; + return st_softpipe_buf->mapped; +} + + +static void +st_softpipe_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = NULL; +} + + +static void +st_softpipe_buffer_destroy(struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); + + if (oldBuf->data) { + if (!oldBuf->userBuffer) + align_free(oldBuf->data); + + oldBuf->data = NULL; + } + + FREE(oldBuf); +} + + +static void +st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ +} + + + +static const char * +st_softpipe_get_name(struct pipe_winsys *winsys) +{ + return "softpipe"; +} + + +static struct pipe_buffer * +st_softpipe_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + buffer->data = align_malloc(size, alignment); + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +st_softpipe_user_buffer_create(struct pipe_winsys *winsys, + void *ptr, + unsigned bytes) +{ + struct st_softpipe_buffer *buffer; + + buffer = CALLOC_STRUCT(st_softpipe_buffer); + if(!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +static struct pipe_buffer * +st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned tex_usage, + unsigned *stride) +{ + const unsigned alignment = 64; + unsigned nblocksy; + + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); +} + + +static void +st_softpipe_fence_reference(struct pipe_winsys *winsys, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +st_softpipe_fence_signalled(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +st_softpipe_fence_finish(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static void +st_softpipe_destroy(struct pipe_winsys *winsys) +{ + FREE(winsys); +} + + +struct pipe_screen * +softpipe_create_screen_malloc(void) +{ + static struct pipe_winsys *winsys; + struct pipe_screen *screen; + + winsys = CALLOC_STRUCT(pipe_winsys); + if(!winsys) + return NULL; + + winsys->destroy = st_softpipe_destroy; + + winsys->buffer_create = st_softpipe_buffer_create; + winsys->user_buffer_create = st_softpipe_user_buffer_create; + winsys->buffer_map = st_softpipe_buffer_map; + winsys->buffer_unmap = st_softpipe_buffer_unmap; + winsys->buffer_destroy = st_softpipe_buffer_destroy; + + winsys->surface_buffer_create = st_softpipe_surface_buffer_create; + + winsys->fence_reference = st_softpipe_fence_reference; + winsys->fence_signalled = st_softpipe_fence_signalled; + winsys->fence_finish = st_softpipe_fence_finish; + + winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; + winsys->get_name = st_softpipe_get_name; + + screen = softpipe_create_screen(winsys); + if(!screen) + st_softpipe_destroy(winsys); + + return screen; +} diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index f203ded29ee..3042e01a05c 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -49,10 +49,17 @@ struct pipe_buffer; struct pipe_context *softpipe_create( struct pipe_screen * ); +/** + * Create a softpipe screen that uses the + * given winsys for allocating buffers. + */ +struct pipe_screen *softpipe_create_screen( struct pipe_winsys * ); -struct pipe_screen * -softpipe_create_screen(struct pipe_winsys *); - +/** + * Create a softpipe screen that uses + * regular malloc to create all its buffers. + */ +struct pipe_screen *softpipe_create_screen_malloc(void); boolean softpipe_get_texture_buffer( struct pipe_texture *texture, diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c index 10e7a121892..ca2c7c49d72 100644 --- a/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -49,7 +49,7 @@ struct svga_constbuf static void svga_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct svga_context *svga = svga_context(pipe); @@ -57,7 +57,7 @@ static void svga_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); pipe_buffer_reference( &svga->curr.cb[shader], - buf->buffer ); + buf ); if (shader == PIPE_SHADER_FRAGMENT) svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index d29f3762d2b..ec2886348b9 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -81,8 +81,10 @@ static enum pipe_error compile_fs( struct svga_context *svga, } result->id = util_bitmask_add(svga->fs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index ae1e77e7d44..e7e6c084321 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -71,7 +71,7 @@ static enum pipe_error compile_vs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret = PIPE_OK; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_vertex_program( vs, key ); if (result == NULL) { @@ -80,8 +80,10 @@ static enum pipe_error compile_vs( struct svga_context *svga, } result->id = util_bitmask_add(svga->vs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, @@ -200,10 +202,12 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; key.element[0].input_offset = vel->src_offset; + key.element[0].instance_divisor = vel->instance_divisor; key.element[0].output_offset = const_idx * 4 * sizeof(float); translate_key_sanitize(&key); @@ -222,7 +226,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, vbuffer->stride); - translate->run(translate, 0, 1, + translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); pipe_buffer_unmap(svga->pipe.screen, diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 075e4f9a0b2..5a9f0fc6901 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -812,13 +812,13 @@ trace_context_set_clip_state(struct pipe_context *_pipe, static INLINE void trace_context_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *buffer) + struct pipe_buffer *buffer) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; if (buffer) - trace_screen_user_buffer_update(_pipe->screen, buffer->buffer); + trace_screen_user_buffer_update(_pipe->screen, buffer); trace_dump_call_begin("pipe_context", "set_constant_buffer"); @@ -827,10 +827,11 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, trace_dump_arg(uint, index); trace_dump_arg(constant_buffer, buffer); + /* XXX hmm? */ if (buffer) { - struct pipe_constant_buffer _buffer; - _buffer.buffer = trace_buffer_unwrap(tr_ctx, buffer->buffer); - pipe->set_constant_buffer(pipe, shader, index, &_buffer); + struct pipe_buffer *_buffer; + _buffer = trace_buffer_unwrap(tr_ctx, buffer); + pipe->set_constant_buffer(pipe, shader, index, _buffer); } else { pipe->set_constant_buffer(pipe, shader, index, buffer); } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 86237e03bcc..32f61f8c944 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -229,7 +229,7 @@ void trace_dump_clip_state(const struct pipe_clip_state *state) } -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +void trace_dump_constant_buffer(const struct pipe_buffer *state) { if (!trace_dumping_enabled_locked()) return; @@ -241,7 +241,7 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) trace_dump_struct_begin("pipe_constant_buffer"); - trace_dump_member(buffer_ptr, state, buffer); + trace_dump_reference(&state->reference); trace_dump_struct_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 07ad6fbb205..c7860fd6e18 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -47,7 +47,7 @@ void trace_dump_scissor_state(const struct pipe_scissor_state *state); void trace_dump_clip_state(const struct pipe_clip_state *state); -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); +void trace_dump_constant_buffer(const struct pipe_buffer *state); void trace_dump_token(const struct tgsi_token *token); diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 26a940593f0..272d0308cc1 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -104,7 +104,8 @@ typedef unsigned char boolean; /* Function visibility */ #ifndef PUBLIC -# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +# if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PUBLIC __attribute__((visibility("default"))) # else # define PUBLIC @@ -139,22 +140,40 @@ typedef unsigned char boolean; -#if defined(__GNUC__) -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) -#define ALIGN16_ASSIGN(NAME) NAME##___aligned -#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) -#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) +/* Macros for data alignment. */ +#if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */ +#define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment))) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */ +#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment))) + #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) -#define ALIGN_STACK __attribute__((force_align_arg_pointer)) +#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer)) #else -#define ALIGN_STACK +#define PIPE_ALIGN_STACK #endif + +#elif defined(_MSC_VER) + +/* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */ +#define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type +#define PIPE_ALIGN_VAR(_alignment) __declspec(align(_alignment)) + +#define PIPE_ALIGN_STACK + +#elif defined(SWIG) + +#define PIPE_ALIGN_TYPE(_alignment, _type) _type +#define PIPE_ALIGN_VAR(_alignment) + +#define PIPE_ALIGN_STACK + #else -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] -#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) -#define ALIGN16_ATTRIB -#define ALIGN8_ATTRIB -#define ALIGN_STACK + +#error "Unsupported compiler" + #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index d2f8085b421..0b8f6da2f4a 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -69,6 +69,22 @@ struct pipe_context { unsigned indexSize, unsigned mode, unsigned start, unsigned count); + void (*draw_arrays_instanced)(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void (*draw_elements_instanced)(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. * Using this to quickly evaluate a specialized path in the draw @@ -170,7 +186,7 @@ struct pipe_context { void (*set_constant_buffer)( struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf ); + struct pipe_buffer *buf ); void (*set_framebuffer_state)( struct pipe_context *, const struct pipe_framebuffer_state * ); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 550e2abc32a..b489b044667 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -121,17 +121,18 @@ struct tgsi_declaration_range unsigned Last : 16; /**< UINT */ }; -#define TGSI_SEMANTIC_POSITION 0 -#define TGSI_SEMANTIC_COLOR 1 -#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define TGSI_SEMANTIC_FOG 3 -#define TGSI_SEMANTIC_PSIZE 4 -#define TGSI_SEMANTIC_GENERIC 5 -#define TGSI_SEMANTIC_NORMAL 6 -#define TGSI_SEMANTIC_FACE 7 -#define TGSI_SEMANTIC_EDGEFLAG 8 -#define TGSI_SEMANTIC_PRIMID 9 -#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */ +#define TGSI_SEMANTIC_POSITION 0 +#define TGSI_SEMANTIC_COLOR 1 +#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define TGSI_SEMANTIC_FOG 3 +#define TGSI_SEMANTIC_PSIZE 4 +#define TGSI_SEMANTIC_GENERIC 5 +#define TGSI_SEMANTIC_NORMAL 6 +#define TGSI_SEMANTIC_FACE 7 +#define TGSI_SEMANTIC_EDGEFLAG 8 +#define TGSI_SEMANTIC_PRIMID 9 +#define TGSI_SEMANTIC_INSTANCEID 10 +#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ struct tgsi_declaration_semantic { diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 60e96b98deb..fdd29ed4492 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -66,10 +66,6 @@ extern "C" { #define PIPE_MAX_TEXTURE_LEVELS 16 -/* fwd decls */ -struct pipe_surface; - - /** * The driver will certainly subclass this to include actual memory * management information. @@ -178,15 +174,6 @@ struct pipe_clip_state }; -/** - * Constants for vertex/fragment shaders - */ -struct pipe_constant_buffer -{ - struct pipe_buffer *buffer; -}; - - struct pipe_shader_state { const struct tgsi_token *tokens; @@ -376,6 +363,11 @@ struct pipe_vertex_element /** Offset of this attribute, in bytes, from the start of the vertex */ unsigned src_offset; + /** Instance data rate divisor. 0 means this is per-vertex data, + * n means per-instance data used for n consecutive instances (n > 0). + */ + unsigned instance_divisor; + /** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does * this attribute live in? */ diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index f131e77ac5e..0fdfa96b35a 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -180,6 +180,7 @@ dri_get_buffers(__DRIdrawable * dPriv) switch (buffers[i].attachment) { case __DRI_BUFFER_FRONT_LEFT: + continue; case __DRI_BUFFER_FAKE_FRONT_LEFT: index = ST_SURFACE_FRONT_LEFT; format = drawable->color_format; @@ -372,6 +373,7 @@ dri_create_buffer(__DRIscreen * sPriv, /* TODO incase of double buffer visual, delay fake creation */ i = 0; drawable->attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT; if (visual->doubleBufferMode) drawable->attachments[i++] = __DRI_BUFFER_BACK_LEFT; diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 793db087ee1..d8c054313b3 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -83,7 +83,7 @@ dri_fill_in_modes(struct dri_screen *screen, unsigned num_modes; uint8_t depth_bits_array[5]; uint8_t stencil_bits_array[5]; - uint8_t msaa_samples_array[1]; + uint8_t msaa_samples_array[2]; unsigned depth_buffer_factor; unsigned back_buffer_factor; unsigned msaa_samples_factor; @@ -147,8 +147,9 @@ dri_fill_in_modes(struct dri_screen *screen, } msaa_samples_array[0] = 0; + msaa_samples_array[1] = 4; back_buffer_factor = 3; - msaa_samples_factor = 1; + msaa_samples_factor = 2; num_modes = depth_buffer_factor * back_buffer_factor * msaa_samples_factor * 4; @@ -158,7 +159,7 @@ dri_fill_in_modes(struct dri_screen *screen, depth_bits_array, stencil_bits_array, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, msaa_samples_factor); } else { __DRIconfig **configs_a8r8g8b8 = NULL; __DRIconfig **configs_x8r8g8b8 = NULL; @@ -170,7 +171,8 @@ dri_fill_in_modes(struct dri_screen *screen, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, + msaa_samples_factor); if (pf_x8r8g8b8) configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV, depth_bits_array, @@ -178,7 +180,8 @@ dri_fill_in_modes(struct dri_screen *screen, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, + msaa_samples_factor); if (configs_a8r8g8b8 && configs_x8r8g8b8) configs = driConcatConfigs(configs_x8r8g8b8, configs_a8r8g8b8); diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c index 8b2e3b5f85b..042e9518c2d 100644 --- a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c @@ -24,7 +24,9 @@ #include <assert.h> #include <string.h> +#include "pipe/p_screen.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "egldriver.h" #include "eglcurrent.h" #include "eglconfigutil.h" @@ -43,8 +45,13 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) struct egl_g3d_display *gdpy = egl_g3d_display(dpy); struct pipe_screen *screen = gdpy->native->screen; struct egl_g3d_context *gctx = egl_g3d_context(ctx); - EGLint num_surfaces; - EGLint s, i; + const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = { + ST_SURFACE_FRONT_LEFT, + ST_SURFACE_BACK_LEFT, + ST_SURFACE_FRONT_RIGHT, + ST_SURFACE_BACK_RIGHT, + }; + EGLint num_surfaces, s; /* validate draw and/or read buffers */ num_surfaces = (gctx->base.ReadSurface == gctx->base.DrawSurface) ? 1 : 2; @@ -52,6 +59,7 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; struct egl_g3d_surface *gsurf; struct egl_g3d_buffer *gbuf; + EGLint att; if (s == 0) { gsurf = egl_g3d_surface(gctx->base.DrawSurface); @@ -63,35 +71,33 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) } if (!gctx->force_validate) { - EGLint cur_w, cur_h; - - cur_w = gsurf->base.Width; - cur_h = gsurf->base.Height; - gsurf->native->validate(gsurf->native, - gbuf->native_atts, gbuf->num_atts, - NULL, - &gsurf->base.Width, &gsurf->base.Height); - /* validate only when the geometry changed */ - if (gsurf->base.Width == cur_w && gsurf->base.Height == cur_h) + unsigned int seq_num; + + gsurf->native->validate(gsurf->native, gbuf->attachment_mask, + &seq_num, NULL, NULL, NULL); + /* skip validation */ + if (gsurf->sequence_number == seq_num) continue; } - gsurf->native->validate(gsurf->native, - gbuf->native_atts, gbuf->num_atts, - (struct pipe_texture **) textures, + pipe_surface_reference(&gsurf->render_surface, NULL); + memset(textures, 0, sizeof(textures)); + + gsurf->native->validate(gsurf->native, gbuf->attachment_mask, + &gsurf->sequence_number, textures, &gsurf->base.Width, &gsurf->base.Height); - for (i = 0; i < gbuf->num_atts; i++) { - struct pipe_texture *pt = textures[i]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct pipe_texture *pt = textures[att]; struct pipe_surface *ps; - if (pt) { + if (native_attachment_mask_test(gbuf->attachment_mask, att) && pt) { ps = screen->get_tex_surface(screen, pt, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE); gctx->stapi->st_set_framebuffer_surface(gbuf->st_fb, - gbuf->st_atts[i], ps); + st_att_map[att], ps); - if (gbuf->native_atts[i] == gsurf->render_att) + if (gsurf->render_att == att) pipe_surface_reference(&gsurf->render_surface, ps); pipe_surface_reference(&ps, NULL); @@ -130,13 +136,7 @@ static void egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx) { struct egl_g3d_context *gctx = egl_g3d_context(ctx); - const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = { - ST_SURFACE_FRONT_LEFT, - ST_SURFACE_BACK_LEFT, - ST_SURFACE_FRONT_RIGHT, - ST_SURFACE_BACK_RIGHT, - }; - EGLint s, i; + EGLint s; /* route draw and read buffers' attachments */ for (s = 0; s < 2; s++) { @@ -152,11 +152,7 @@ egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx) gbuf = &gctx->read; } - gbuf->native_atts[0] = gsurf->render_att; - gbuf->num_atts = 1; - - for (i = 0; i < gbuf->num_atts; i++) - gbuf->st_atts[i] = st_att_map[gbuf->native_atts[i]]; + gbuf->attachment_mask = (1 << gsurf->render_att); /* FIXME OpenGL defaults to draw the front or back buffer when the * context is single-buffered or double-buffered respectively. In EGL, @@ -198,19 +194,19 @@ egl_g3d_realloc_context(_EGLDisplay *dpy, _EGLContext *ctx) if (!gdraw || priv != (void *) &gdraw->base) { gctx->stapi->st_unreference_framebuffer(gctx->draw.st_fb); gctx->draw.st_fb = NULL; - gctx->draw.num_atts = 0; + gctx->draw.attachment_mask = 0x0; } if (is_equal) { gctx->read.st_fb = NULL; - gctx->draw.num_atts = 0; + gctx->draw.attachment_mask = 0x0; } else { priv = gctx->stapi->st_framebuffer_private(gctx->read.st_fb); if (!gread || priv != (void *) &gread->base) { gctx->stapi->st_unreference_framebuffer(gctx->read.st_fb); gctx->read.st_fb = NULL; - gctx->draw.num_atts = 0; + gctx->draw.attachment_mask = 0x0; } } } @@ -459,16 +455,30 @@ egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id) * Flush the front buffer of the context's draw surface. */ static void -egl_g3d_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) +egl_g3d_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, void *context_private) { struct egl_g3d_context *gctx = egl_g3d_context(context_private); struct egl_g3d_surface *gsurf = egl_g3d_surface(gctx->base.DrawSurface); - if (gsurf) { + if (gsurf) gsurf->native->flush_frontbuffer(gsurf->native); - egl_g3d_validate_context(gctx->base.Display, &gctx->base); - } +} + +/** + * Re-validate the context. + */ +static void +egl_g3d_update_buffer(struct pipe_screen *screen, void *context_private) +{ + struct egl_g3d_context *gctx = egl_g3d_context(context_private); + + /** + * It is likely that the surface has changed when this function is called. + * Set force_validate to skip an unnecessary check. + */ + gctx->force_validate = EGL_TRUE; + egl_g3d_validate_context(gctx->base.Display, &gctx->base); } static EGLBoolean @@ -512,13 +522,15 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, } dpy->DriverData = gdpy; - gdpy->native = - native_create_display(dpy->NativeDisplay, egl_g3d_flush_frontbuffer); + gdpy->native = native_create_display(dpy->NativeDisplay); if (!gdpy->native) { _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); goto fail; } + gdpy->native->screen->flush_frontbuffer = egl_g3d_flush_frontbuffer; + gdpy->native->screen->update_buffer = egl_g3d_update_buffer; + dpy->ClientAPIsMask = gdrv->api_mask; if (egl_g3d_add_configs(drv, dpy, 1) == 1) { @@ -550,7 +562,7 @@ egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, _EGLContext *share, const EGLint *attribs) { struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - struct egl_g3d_context *xshare = egl_g3d_context(share); + struct egl_g3d_context *gshare = egl_g3d_context(share); struct egl_g3d_config *gconf = egl_g3d_config(conf); struct egl_g3d_context *gctx; const __GLcontextModes *mode; @@ -575,8 +587,18 @@ egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, mode = &gconf->native->mode; gctx->pipe = gdpy->native->create_context(gdpy->native, (void *) &gctx->base); + if (!gctx->pipe) { + free(gctx); + return NULL; + } + gctx->st_ctx = gctx->stapi->st_create_context(gctx->pipe, mode, - (xshare) ? xshare->st_ctx : NULL); + (gshare) ? gshare->st_ctx : NULL); + if (!gctx->st_ctx) { + gctx->pipe->destroy(gctx->pipe); + free(gctx); + return NULL; + } return &gctx->base; } @@ -599,6 +621,16 @@ egl_g3d_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) return EGL_TRUE; } +static EGLBoolean +init_surface_geometry(_EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + return gsurf->native->validate(gsurf->native, 0x0, + &gsurf->sequence_number, NULL, + &gsurf->base.Width, &gsurf->base.Height); +} + static _EGLSurface * egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, EGLNativeWindowType win, @@ -626,8 +658,7 @@ egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } - if (!gsurf->native->validate(gsurf->native, NULL, 0, NULL, - &gsurf->base.Width, &gsurf->base.Height)) { + if (!init_surface_geometry(&gsurf->base)) { gsurf->native->destroy(gsurf->native); free(gsurf); return NULL; @@ -667,8 +698,7 @@ egl_g3d_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } - if (!gsurf->native->validate(gsurf->native, NULL, 0, NULL, - &gsurf->base.Width, &gsurf->base.Height)) { + if (!init_surface_geometry(&gsurf->base)) { gsurf->native->destroy(gsurf->native); free(gsurf); return NULL; @@ -706,6 +736,12 @@ egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; @@ -817,14 +853,110 @@ egl_g3d_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config); /* force validation if the swap method is not copy */ - if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) + if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) { gctx->force_validate = EGL_TRUE; - egl_g3d_validate_context(dpy, &gctx->base); + egl_g3d_validate_context(dpy, &gctx->base); + } } return EGL_TRUE; } +/** + * Find a config that supports the pixmap. + */ +static _EGLConfig * +find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf; + EGLint i; + + for (i = 0; i < dpy->NumConfigs; i++) { + gconf = egl_g3d_config(dpy->Configs[i]); + if (gdpy->native->is_pixmap_supported(gdpy->native, pix, gconf->native)) + break; + } + + return (i < dpy->NumConfigs) ? &gconf->base : NULL; +} + +/** + * Get the pipe surface of the given attachment of the native surface. + */ +static struct pipe_surface * +get_pipe_surface(struct native_display *ndpy, struct native_surface *nsurf, + enum native_attachment natt) +{ + struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + struct pipe_surface *psurf; + + textures[natt] = NULL; + nsurf->validate(nsurf, 1 << natt, NULL, textures, NULL, NULL); + if (!textures[natt]) + return NULL; + + psurf = ndpy->screen->get_tex_surface(ndpy->screen, textures[natt], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_texture_reference(&textures[natt], NULL); + + return psurf; +} + +static EGLBoolean +egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, + NativePixmapType target) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + _EGLContext *ctx = _eglGetCurrentContext(); + struct egl_g3d_config *gconf; + struct native_surface *nsurf; + struct pipe_screen *screen = gdpy->native->screen; + struct pipe_surface *psurf; + + if (!gsurf->render_surface) + return EGL_TRUE; + + gconf = egl_g3d_config(find_pixmap_config(dpy, target)); + if (!gconf) + return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); + + nsurf = gdpy->native->create_pixmap_surface(gdpy->native, + target, gconf->native); + if (!nsurf) + return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); + + /* flush if the surface is current */ + if (ctx && ctx->DrawSurface == &gsurf->base) { + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + gctx->stapi->st_flush(gctx->st_ctx, + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); + } + + psurf = get_pipe_surface(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); + if (psurf) { + struct pipe_context pipe; + + /** + * XXX This is hacky. If we might allow the EGLDisplay to create a pipe + * context of its own and use the blitter context for this. + */ + memset(&pipe, 0, sizeof(pipe)); + pipe.screen = screen; + + util_surface_copy(&pipe, FALSE, psurf, 0, 0, + gsurf->render_surface, 0, 0, psurf->width, psurf->height); + + pipe_surface_reference(&psurf, NULL); + nsurf->flush_frontbuffer(nsurf); + } + + nsurf->destroy(nsurf); + + return EGL_TRUE; +} + static EGLBoolean egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) { @@ -849,15 +981,14 @@ egl_g3d_wait_native(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine) } static _EGLProc -egl_g3d_get_proc_address(const char *procname) +egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname) { - /* FIXME how come _EGLDriver is not passed? */ - const struct egl_g3d_st *stapi; + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); _EGLProc proc; EGLint i; for (i = 0; i < NUM_EGL_G3D_STS; i++) { - stapi = egl_g3d_get_st(i); + const struct egl_g3d_st *stapi = gdrv->stapis[i]; if (stapi) { proc = (_EGLProc) stapi->st_get_proc_address(procname); if (proc) @@ -1079,6 +1210,7 @@ _eglMain(const char *args) gdrv->base.API.DestroySurface = egl_g3d_destroy_surface; gdrv->base.API.MakeCurrent = egl_g3d_make_current; gdrv->base.API.SwapBuffers = egl_g3d_swap_buffers; + gdrv->base.API.CopyBuffers = egl_g3d_copy_buffers; gdrv->base.API.WaitClient = egl_g3d_wait_client; gdrv->base.API.WaitNative = egl_g3d_wait_native; gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address; diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h index 33894b614f9..1da8af495b1 100644 --- a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h +++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h @@ -52,9 +52,7 @@ struct egl_g3d_display { struct egl_g3d_buffer { struct st_framebuffer *st_fb; - EGLint num_atts; - enum native_attachment native_atts[NUM_NATIVE_ATTACHMENTS]; - uint st_atts[NUM_NATIVE_ATTACHMENTS]; + uint attachment_mask; }; struct egl_g3d_context { @@ -73,6 +71,7 @@ struct egl_g3d_surface { struct native_surface *native; enum native_attachment render_att; struct pipe_surface *render_surface; + unsigned int sequence_number; }; struct egl_g3d_config { diff --git a/src/gallium/state_trackers/egl_g3d/common/native.h b/src/gallium/state_trackers/egl_g3d/common/native.h index 88d87c6f3ae..f374f2e4a68 100644 --- a/src/gallium/state_trackers/egl_g3d/common/native.h +++ b/src/gallium/state_trackers/egl_g3d/common/native.h @@ -64,13 +64,19 @@ struct native_surface { boolean (*flush_frontbuffer)(struct native_surface *nsurf); /** - * Validate the buffers of the surface. Those not listed in the attachments - * will be destroyed. The returned textures are owned by the caller. + * Validate the buffers of the surface. textures, if not NULL, points to an + * array of size NUM_NATIVE_ATTACHMENTS and the returned textures are owned + * by the caller. A sequence number is also returned. The caller can use + * it to check if anything has changed since the last call. Any of the + * pointers may be NULL and it indicates the caller has no interest in those + * values. + * + * If this function is called multiple times with different attachment + * masks, those not listed in the latest call might be destroyed. This + * behavior might change in the future. */ - boolean (*validate)(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, + boolean (*validate)(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, int *width, int *height); /** @@ -80,6 +86,7 @@ struct native_surface { }; struct native_config { + /* __GLcontextModes should go away some day */ __GLcontextModes mode; enum pipe_format color_format; enum pipe_format depth_format; @@ -107,7 +114,14 @@ struct native_display_modeset; * the native display server. */ struct native_display { + /** + * The pipe screen of the native display. + * + * Note that the "flush_frontbuffer" and "update_buffer" callbacks will be + * overridden. + */ struct pipe_screen *screen; + void (*destroy)(struct native_display *ndpy); /** @@ -122,6 +136,17 @@ struct native_display { int *num_configs); /** + * Test if a pixmap is supported by the given config. Required unless no + * config has GLX_PIXMAP_BIT set. + * + * This function is usually called to find a config that supports a given + * pixmap. Thus, it is usually called with the same pixmap in a row. + */ + boolean (*is_pixmap_supported)(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf); + + /** * Create a pipe context. */ struct pipe_context *(*create_context)(struct native_display *ndpy, @@ -197,15 +222,19 @@ struct native_display_modeset { const struct native_mode *nmode); }; -typedef void (*native_flush_frontbuffer)(void *dummy, - struct pipe_surface *surf, - void *context_private); +/** + * Test whether an attachment is set in the mask. + */ +static INLINE boolean +native_attachment_mask_test(uint mask, enum native_attachment att) +{ + return !!(mask & (1 << att)); +} const char * native_get_name(void); struct native_display * -native_create_display(EGLNativeDisplayType dpy, - native_flush_frontbuffer flush_frontbuffer); +native_create_display(EGLNativeDisplayType dpy); #endif /* _NATIVE_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.c b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c index 0e0babdb14f..dc664366305 100644 --- a/src/gallium/state_trackers/egl_g3d/kms/native_kms.c +++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c @@ -33,22 +33,17 @@ #include "native_kms.h" static boolean -kms_surface_validate(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, +kms_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, int *width, int *height) { struct kms_surface *ksurf = kms_surface(nsurf); struct kms_display *kdpy = ksurf->kdpy; struct pipe_screen *screen = kdpy->base.screen; struct pipe_texture templ, *ptex; - int i; - - if (num_natts) { - if (textures) - memset(textures, 0, sizeof(*textures) * num_natts); + int att; + if (attachment_mask) { memset(&templ, 0, sizeof(templ)); templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; @@ -62,19 +57,25 @@ kms_surface_validate(struct native_surface *nsurf, } /* create textures */ - for (i = 0; i < num_natts; i++) { - enum native_attachment natt = natts[i]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) + continue; - ptex = ksurf->textures[natt]; + ptex = ksurf->textures[att]; if (!ptex) { ptex = screen->texture_create(screen, &templ); - ksurf->textures[natt] = ptex; + ksurf->textures[att] = ptex; } - if (textures) - pipe_texture_reference(&textures[i], ptex); + if (textures) { + textures[att] = NULL; + pipe_texture_reference(&textures[att], ptex); + } } + if (seq_num) + *seq_num = ksurf->sequence_number; if (width) *width = ksurf->width; if (height) @@ -111,7 +112,7 @@ kms_surface_init_framebuffers(struct native_surface *nsurf, boolean need_back) if (!fb->texture) { /* make sure the texture has been allocated */ - kms_surface_validate(&ksurf->base, &natt, 1, NULL, NULL, NULL); + kms_surface_validate(&ksurf->base, 1 << natt, NULL, NULL, NULL, NULL); if (!ksurf->textures[natt]) return FALSE; @@ -196,6 +197,9 @@ kms_surface_swap_buffers(struct native_surface *nsurf) ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT]; ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT] = tmp_texture; + /* the front/back textures are swapped */ + ksurf->sequence_number++; + return TRUE; } @@ -764,8 +768,7 @@ static struct native_display_modeset kms_display_modeset = { }; static struct native_display * -kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api, - native_flush_frontbuffer flush_frontbuffer) +kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api) { struct kms_display *kdpy; @@ -807,10 +810,6 @@ kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api, return NULL; } - kdpy->base.screen->flush_frontbuffer = - (void (*)(struct pipe_screen *, struct pipe_surface *, void *)) - flush_frontbuffer; - kdpy->base.destroy = kms_display_destroy; kdpy->base.get_configs = kms_display_get_configs; kdpy->base.create_context = kms_display_create_context; @@ -821,13 +820,6 @@ kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api, return &kdpy->base; } -static void -dummy_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) -{ - _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied"); -} - /* the api is destroyed with the native display */ static struct drm_api *drm_api; @@ -848,19 +840,15 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - native_flush_frontbuffer flush_frontbuffer) +native_create_display(EGLNativeDisplayType dpy) { struct native_display *ndpy = NULL; if (!drm_api) drm_api = drm_api_create(); - if (!flush_frontbuffer) - flush_frontbuffer = dummy_flush_frontbuffer; - if (drm_api) - ndpy = kms_create_display(dpy, drm_api, flush_frontbuffer); + ndpy = kms_create_display(dpy, drm_api); return ndpy; } diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.h b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h index 3f869b25acf..095186e3cf3 100644 --- a/src/gallium/state_trackers/egl_g3d/kms/native_kms.h +++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h @@ -81,6 +81,7 @@ struct kms_surface { int width, height; struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; struct kms_framebuffer front_fb, back_fb; boolean is_shown; diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c index 0dda786bbdb..07f82d878c5 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c @@ -64,6 +64,7 @@ struct dri2_surface { struct pipe_texture *pbuffer_textures[NUM_NATIVE_ATTACHMENTS]; boolean have_back, have_fake; int width, height; + unsigned int sequence_number; }; struct dri2_config { @@ -133,21 +134,18 @@ dri2_surface_swap_buffers(struct native_surface *nsurf) } static boolean -dri2_surface_validate(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, - int *width, int *height) +dri2_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) { struct dri2_surface *dri2surf = dri2_surface(nsurf); struct dri2_display *dri2dpy = dri2surf->dri2dpy; unsigned int dri2atts[NUM_NATIVE_ATTACHMENTS]; - EGLint texture_indices[NUM_NATIVE_ATTACHMENTS]; struct pipe_texture templ; struct x11_drawable_buffer *xbufs; - int num_ins, num_outs, i; + int num_ins, num_outs, att, i; - if (num_natts) { + if (attachment_mask) { memset(&templ, 0, sizeof(templ)); templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; @@ -158,26 +156,31 @@ dri2_surface_validate(struct native_surface *nsurf, templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; if (textures) - memset(textures, 0, sizeof(*textures) * num_natts); + memset(textures, 0, sizeof(*textures) * NUM_NATIVE_ATTACHMENTS); } /* create textures for pbuffer */ if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) { struct pipe_screen *screen = dri2dpy->base.screen; - for (i = 0; i < num_natts; i++) { - enum native_attachment natt = natts[i]; - struct pipe_texture *ptex = dri2surf->pbuffer_textures[natt]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct pipe_texture *ptex = dri2surf->pbuffer_textures[att]; + + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) + continue; if (!ptex) { ptex = screen->texture_create(screen, &templ); - dri2surf->pbuffer_textures[natt] = ptex; + dri2surf->pbuffer_textures[att] = ptex; } if (textures) - pipe_texture_reference(&textures[i], ptex); + pipe_texture_reference(&textures[att], ptex); } + if (seq_num) + *seq_num = dri2surf->sequence_number; if (width) *width = dri2surf->width; if (height) @@ -186,48 +189,56 @@ dri2_surface_validate(struct native_surface *nsurf, return TRUE; } - for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) - texture_indices[i] = -1; - /* prepare the attachments */ - num_ins = num_natts; - for (i = 0; i < num_natts; i++) { - unsigned int dri2att; + num_ins = 0; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + if (native_attachment_mask_test(attachment_mask, att)) { + unsigned int dri2att; + + switch (att) { + case NATIVE_ATTACHMENT_FRONT_LEFT: + dri2att = DRI2BufferFrontLeft; + break; + case NATIVE_ATTACHMENT_BACK_LEFT: + dri2att = DRI2BufferBackLeft; + break; + case NATIVE_ATTACHMENT_FRONT_RIGHT: + dri2att = DRI2BufferFrontRight; + break; + case NATIVE_ATTACHMENT_BACK_RIGHT: + dri2att = DRI2BufferBackRight; + break; + default: + assert(0); + dri2att = 0; + break; + } - switch (natts[i]) { - case NATIVE_ATTACHMENT_FRONT_LEFT: - dri2att = DRI2BufferFrontLeft; - break; - case NATIVE_ATTACHMENT_BACK_LEFT: - dri2att = DRI2BufferBackLeft; - break; - case NATIVE_ATTACHMENT_FRONT_RIGHT: - dri2att = DRI2BufferFrontRight; - break; - case NATIVE_ATTACHMENT_BACK_RIGHT: - dri2att = DRI2BufferBackRight; - break; - default: - assert(0); - dri2att = 0; - break; + dri2atts[num_ins] = dri2att; + num_ins++; } - dri2atts[i] = dri2att; - texture_indices[natts[i]] = i; } dri2surf->have_back = FALSE; dri2surf->have_fake = FALSE; + /* remember old geometry */ + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + xbufs = x11_drawable_get_buffers(dri2dpy->xscr, dri2surf->drawable, &dri2surf->width, &dri2surf->height, dri2atts, FALSE, num_ins, &num_outs); if (!xbufs) return FALSE; - /* update width and height */ - templ.width0 = dri2surf->width; - templ.height0 = dri2surf->height; + if (templ.width0 != dri2surf->width || templ.height0 != dri2surf->height) { + /* are there cases where the buffers change and the geometry doesn't? */ + dri2surf->sequence_number++; + + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + } for (i = 0; i < num_outs; i++) { struct x11_drawable_buffer *xbuf = &xbufs[i]; @@ -254,13 +265,13 @@ dri2_surface_validate(struct native_surface *nsurf, break; } - if (!desc || texture_indices[natt] < 0 || - (textures && textures[texture_indices[natt]])) { + if (!desc || !native_attachment_mask_test(attachment_mask, natt) || + (textures && textures[natt])) { if (!desc) _eglLog(_EGL_WARNING, "unknown buffer %d", xbuf->attachment); - else if (texture_indices[natt] < 0) + else if (!native_attachment_mask_test(attachment_mask, natt)) _eglLog(_EGL_WARNING, "unexpected buffer %d", xbuf->attachment); - else if (textures && textures[texture_indices[natt]]) + else _eglLog(_EGL_WARNING, "both real and fake front buffers are listed"); continue; } @@ -272,13 +283,15 @@ dri2_surface_validate(struct native_surface *nsurf, desc, xbuf->pitch, xbuf->name); if (ptex) { /* the caller owns the textures */ - textures[texture_indices[natt]] = ptex; + textures[natt] = ptex; } } } free(xbufs); + if (seq_num) + *seq_num = dri2surf->sequence_number; if (width) *width = dri2surf->width; if (height) @@ -575,6 +588,21 @@ dri2_display_get_configs(struct native_display *ndpy, int *num_configs) return configs; } +static boolean +dri2_display_is_pixmap_supported(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + uint depth, nconf_depth; + + depth = x11_drawable_get_depth(dri2dpy->xscr, (Drawable) pix); + nconf_depth = util_format_get_blocksizebits(nconf->color_format); + + /* simple depth match for now */ + return (depth == nconf_depth || (depth == 24 && depth + 8 == nconf_depth)); +} + static void dri2_display_destroy(struct native_display *ndpy) { @@ -627,18 +655,8 @@ dri2_display_init_screen(struct native_display *ndpy) return TRUE; } -static void -dri2_display_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) -{ - /* TODO get native surface from context private, and remove the callback */ - _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied"); -} - struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, - struct drm_api *api, - native_flush_frontbuffer flush_frontbuffer) +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api) { struct dri2_display *dri2dpy; @@ -675,15 +693,9 @@ x11_create_dri2_display(EGLNativeDisplayType dpy, return NULL; } - if (!flush_frontbuffer) - flush_frontbuffer = dri2_display_flush_frontbuffer; - - dri2dpy->base.screen->flush_frontbuffer = - (void (*)(struct pipe_screen *, struct pipe_surface *, void *)) - flush_frontbuffer; - dri2dpy->base.destroy = dri2_display_destroy; dri2dpy->base.get_configs = dri2_display_get_configs; + dri2dpy->base.is_pixmap_supported = dri2_display_is_pixmap_supported; dri2dpy->base.create_context = dri2_display_create_context; dri2dpy->base.create_window_surface = dri2_display_create_window_surface; dri2dpy->base.create_pixmap_surface = dri2_display_create_pixmap_surface; diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.c b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c index a4f36e9deca..583ce3d3293 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_x11.c +++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c @@ -48,8 +48,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - native_flush_frontbuffer flush_frontbuffer) +native_create_display(EGLNativeDisplayType dpy) { struct native_display *ndpy = NULL; boolean force_sw; @@ -59,14 +58,14 @@ native_create_display(EGLNativeDisplayType dpy, force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); if (api && !force_sw) { - ndpy = x11_create_dri2_display(dpy, api, flush_frontbuffer); + ndpy = x11_create_dri2_display(dpy, api); } if (!ndpy) { EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; _eglLog(level, "use software fallback"); - ndpy = x11_create_ximage_display(dpy, TRUE, flush_frontbuffer); + ndpy = x11_create_ximage_display(dpy, TRUE); } return ndpy; diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.h b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h index 9217eb62529..622ddac5df6 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_x11.h +++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h @@ -29,13 +29,9 @@ #include "common/native.h" struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, - boolean use_xshm, - native_flush_frontbuffer flush_frontbuffer); +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm); struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, - struct drm_api *api, - native_flush_frontbuffer flush_frontbuffer); +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api); #endif /* _NATIVE_X11_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c index e02faa9b7ba..d76107c47f1 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c @@ -83,6 +83,7 @@ struct ximage_surface { GC gc; struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; }; struct ximage_config { @@ -260,6 +261,9 @@ ximage_surface_swap_buffers(struct native_surface *nsurf) *xfront = *xback; *xback = xtmp; + /* the front/back textures are swapped */ + xsurf->sequence_number++; + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); } @@ -285,33 +289,29 @@ ximage_surface_update_geometry(struct native_surface *nsurf) } static boolean -ximage_surface_validate(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, - int *width, int *height) +ximage_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) { struct ximage_surface *xsurf = ximage_surface(nsurf); - boolean error = FALSE; - unsigned i; + boolean new_buffers = FALSE; + int att; ximage_surface_update_geometry(&xsurf->base); - if (textures) - memset(textures, 0, sizeof(*textures) * num_natts); - - for (i = 0; i < num_natts; i++) { - enum native_attachment natt = natts[i]; - struct ximage_buffer *xbuf = &xsurf->buffers[natt]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct ximage_buffer *xbuf = &xsurf->buffers[att]; - if (!xbuf) + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) continue; /* reallocate the texture */ if (!xbuf->texture || xsurf->width != xbuf->texture->width0 || xsurf->height != xbuf->texture->height0) { - if (ximage_surface_alloc_buffer(&xsurf->base, natt)) { + new_buffers = TRUE; + if (ximage_surface_alloc_buffer(&xsurf->base, att)) { /* update ximage */ if (xbuf->ximage) { xbuf->ximage->width = xbuf->transfer->width; @@ -321,27 +321,24 @@ ximage_surface_validate(struct native_surface *nsurf, } } - /* allocation failed */ - if (!xbuf->texture) { - unsigned j; - for (j = 0; j < i; j++) - pipe_texture_reference(&textures[j], NULL); - for (j = i; j < num_natts; j++) - textures[j] = NULL; - error = TRUE; - break; + if (textures) { + textures[att] = NULL; + pipe_texture_reference(&textures[att], xbuf->texture); } - - if (textures) - pipe_texture_reference(&textures[i], xbuf->texture); } + /* increase the sequence number so that caller knows */ + if (new_buffers) + xsurf->sequence_number++; + + if (seq_num) + *seq_num = xsurf->sequence_number; if (width) *width = xsurf->width; if (height) *height = xsurf->height; - return !error; + return TRUE; } static void @@ -603,6 +600,34 @@ ximage_display_get_configs(struct native_display *ndpy, int *num_configs) return configs; } +static boolean +ximage_display_is_pixmap_supported(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + enum pipe_format fmt; + uint depth; + + depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix); + switch (depth) { + case 32: + fmt = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case 24: + fmt = PIPE_FORMAT_X8R8G8B8_UNORM; + break; + case 16: + fmt = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + fmt = PIPE_FORMAT_NONE; + break; + } + + return (fmt == nconf->color_format); +} + static void ximage_display_destroy(struct native_display *ndpy) { @@ -620,18 +645,8 @@ ximage_display_destroy(struct native_display *ndpy) free(xdpy); } -static void -ximage_display_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) -{ - /* TODO get native surface from context private, and remove the callback */ - _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied"); -} - struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, - boolean use_xshm, - native_flush_frontbuffer flush_frontbuffer) +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm) { struct ximage_display *xdpy; @@ -660,17 +675,12 @@ x11_create_ximage_display(EGLNativeDisplayType dpy, (use_xshm && x11_screen_support(xdpy->xscr, X11_SCREEN_EXTENSION_XSHM)); xdpy->winsys = create_sw_winsys(); - if (!flush_frontbuffer) - flush_frontbuffer = ximage_display_flush_frontbuffer; - xdpy->winsys->flush_frontbuffer = - (void (*)(struct pipe_winsys *, struct pipe_surface *, void *)) - flush_frontbuffer; - xdpy->base.screen = softpipe_create_screen(xdpy->winsys); xdpy->base.destroy = ximage_display_destroy; xdpy->base.get_configs = ximage_display_get_configs; + xdpy->base.is_pixmap_supported = ximage_display_is_pixmap_supported; xdpy->base.create_context = ximage_display_create_context; xdpy->base.create_window_surface = ximage_display_create_window_surface; xdpy->base.create_pixmap_surface = ximage_display_create_pixmap_surface; diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c index 1e98943242a..4d68a88d2e6 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c @@ -30,6 +30,7 @@ #include <X11/extensions/XShm.h> #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_format.h" #include "xf86drm.h" #include "egllog.h" @@ -50,6 +51,10 @@ struct x11_screen { XVisualInfo *visuals; int num_visuals; + + /* cached values for x11_drawable_get_depth */ + Drawable last_drawable; + unsigned int last_depth; }; @@ -351,6 +356,37 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, } /** + * Return the depth of a drawable. + * + * Unlike other drawable functions, the drawable needs not be a DRI2 drawable. + */ +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable) +{ + unsigned int depth; + + if (drawable != xscr->last_drawable) { + Window root; + int x, y; + unsigned int w, h, border; + Status ok; + + ok = XGetGeometry(xscr->dpy, drawable, &root, + &x, &y, &w, &h, &border, &depth); + if (!ok) + depth = 0; + + xscr->last_drawable = drawable; + xscr->last_depth = depth; + } + else { + depth = xscr->last_depth; + } + + return depth; +} + +/** * Create a mode list of the given size. */ __GLcontextModes * diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h index 86e8e0501a3..bf482189053 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h +++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h @@ -96,4 +96,7 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, int *width, int *height, unsigned int *attachments, boolean with_format, int num_ins, int *num_outs); +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable); + #endif /* _X11_SCREEN_H_ */ diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c index 84d47b12edc..bd4a85caa04 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c +++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c @@ -193,7 +193,7 @@ _glxapi_get_proc_address(const char *funcName) } -__GLXextFuncPtr +PUBLIC __GLXextFuncPtr glXGetProcAddressARB(const GLubyte *procName) { __GLXextFuncPtr f; diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 96b13c2258e..68797228459 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -76,7 +76,6 @@ %rename(BlendColor) pipe_blend_color; %rename(Blend) pipe_blend_state; %rename(Clip) pipe_clip_state; -%rename(ConstantBuffer) pipe_constant_buffer; %rename(Depth) pipe_depth_state; %rename(Stencil) pipe_stencil_state; %rename(Alpha) pipe_alpha_state; diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 84ce1a41e6d..ce893dad453 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -142,10 +142,7 @@ struct st_context { void set_constant_buffer(unsigned shader, unsigned index, struct pipe_buffer *buffer ) { - struct pipe_constant_buffer state; - memset(&state, 0, sizeof(state)); - state.buffer = buffer; - $self->pipe->set_constant_buffer($self->pipe, shader, index, &state); + $self->pipe->set_constant_buffer($self->pipe, shader, index, buffer); } void set_framebuffer(const struct pipe_framebuffer_state *state ) diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index a68709f5cf3..b61d47d6456 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -94,7 +94,7 @@ struct_factories = { "pipe_blend_color": gallium.BlendColor, "pipe_blend_state": gallium.Blend, #"pipe_clip_state": gallium.Clip, - #"pipe_constant_buffer": gallium.ConstantBuffer, + #"pipe_buffer": gallium.Buffer, "pipe_depth_state": gallium.Depth, "pipe_stencil_state": gallium.Stencil, "pipe_alpha_state": gallium.Alpha, @@ -462,10 +462,10 @@ class Context(Object): sys.stdout.flush() def set_constant_buffer(self, shader, index, buffer): - if buffer is not None and buffer.buffer is not None: - self.real.set_constant_buffer(shader, index, buffer.buffer) + if buffer is not None: + self.real.set_constant_buffer(shader, index, buffer) - self.dump_constant_buffer(buffer.buffer) + self.dump_constant_buffer(buffer) def set_framebuffer_state(self, state): _state = gallium.Framebuffer() diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index a3294e877a6..dfe3e465f77 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -35,225 +35,10 @@ * @author Jose Fonseca */ - -#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */ -#include "pipe/p_format.h" -#include "pipe/p_context.h" -#include "pipe/p_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "st_winsys.h" - -struct st_softpipe_buffer -{ - struct pipe_buffer base; - boolean userBuffer; /** Is this a user-space buffer? */ - void *data; - void *mapped; -}; - - -/** Cast wrapper */ -static INLINE struct st_softpipe_buffer * -st_softpipe_buffer( struct pipe_buffer *buf ) -{ - return (struct st_softpipe_buffer *)buf; -} - - -static void * -st_softpipe_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = st_softpipe_buf->data; - return st_softpipe_buf->mapped; -} - - -static void -st_softpipe_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = NULL; -} - - -static void -st_softpipe_buffer_destroy(struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); - - if (oldBuf->data) { - if (!oldBuf->userBuffer) - align_free(oldBuf->data); - - oldBuf->data = NULL; - } - - FREE(oldBuf); -} - - -static void -st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, - struct pipe_surface *surf, - void *context_private) -{ -} - - - -static const char * -st_softpipe_get_name(struct pipe_winsys *winsys) -{ - return "softpipe"; -} - - -static struct pipe_buffer * -st_softpipe_buffer_create(struct pipe_winsys *winsys, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.alignment = alignment; - buffer->base.usage = usage; - buffer->base.size = size; - - buffer->data = align_malloc(size, alignment); - - return &buffer->base; -} - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_buffer * -st_softpipe_user_buffer_create(struct pipe_winsys *winsys, - void *ptr, - unsigned bytes) -{ - struct st_softpipe_buffer *buffer; - - buffer = CALLOC_STRUCT(st_softpipe_buffer); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.size = bytes; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -static struct pipe_buffer * -st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - const unsigned alignment = 64; - unsigned nblocksy; - - nblocksy = util_format_get_nblocksy(format, height); - *stride = align(util_format_get_stride(format, width), alignment); - - return winsys->buffer_create(winsys, alignment, - usage, - *stride * nblocksy); -} - - -static void -st_softpipe_fence_reference(struct pipe_winsys *winsys, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ -} - - -static int -st_softpipe_fence_signalled(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static int -st_softpipe_fence_finish(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static void -st_softpipe_destroy(struct pipe_winsys *winsys) -{ - FREE(winsys); -} - - -static struct pipe_screen * -st_softpipe_screen_create(void) -{ - static struct pipe_winsys *winsys; - struct pipe_screen *screen; - - winsys = CALLOC_STRUCT(pipe_winsys); - if(!winsys) - return NULL; - - winsys->destroy = st_softpipe_destroy; - - winsys->buffer_create = st_softpipe_buffer_create; - winsys->user_buffer_create = st_softpipe_user_buffer_create; - winsys->buffer_map = st_softpipe_buffer_map; - winsys->buffer_unmap = st_softpipe_buffer_unmap; - winsys->buffer_destroy = st_softpipe_buffer_destroy; - - winsys->surface_buffer_create = st_softpipe_surface_buffer_create; - - winsys->fence_reference = st_softpipe_fence_reference; - winsys->fence_signalled = st_softpipe_fence_signalled; - winsys->fence_finish = st_softpipe_fence_finish; - - winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; - winsys->get_name = st_softpipe_get_name; - - screen = softpipe_create_screen(winsys); - if(!screen) - st_softpipe_destroy(winsys); - - return screen; -} - - -static struct pipe_context * -st_softpipe_context_create(struct pipe_screen *screen) -{ - return softpipe_create(screen); -} - - const struct st_winsys st_softpipe_winsys = { - &st_softpipe_screen_create, - &st_softpipe_context_create, + &softpipe_create_screen_malloc, + &softpipe_create, }; diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore new file mode 100644 index 00000000000..e33609d251c --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore @@ -0,0 +1 @@ +*.png diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore new file mode 100644 index 00000000000..e33609d251c --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore @@ -0,0 +1 @@ +*.png diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index 2f984fb7b9a..8f69ee01099 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -147,22 +147,22 @@ static void setup_constant_buffer(struct vg_context *ctx, const void *buffer, VGint param_bytes) { struct pipe_context *pipe = ctx->pipe; - struct pipe_constant_buffer *cbuf = &ctx->filter.buffer; + struct pipe_buffer **cbuf = &ctx->filter.buffer; /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(pipe->screen, 16, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); + *cbuf = pipe_buffer_create(pipe->screen, 16, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, buffer); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); } static void setup_samplers(struct vg_context *ctx, struct filter_info *info) diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c index 4f9f3dae173..97cbe69205e 100644 --- a/src/gallium/state_trackers/vega/api_masks.c +++ b/src/gallium/state_trackers/vega/api_masks.c @@ -32,7 +32,6 @@ #include "vg_context.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" /* for winsys->update_buffer */ #include "util/u_pack_color.h" #include "util/u_draw_quad.h" @@ -116,8 +115,8 @@ clear_with_quad(struct vg_context *st, float x0, float y0, x1, y1); */ - if (st->pipe->winsys && st->pipe->winsys->update_buffer) - st->pipe->winsys->update_buffer( st->pipe->winsys, + if (st->pipe->screen && st->pipe->screen->update_buffer) + st->pipe->screen->update_buffer( st->pipe->screen, st->pipe->priv ); cso_save_blend(st->cso_context); diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index 42300bb6d57..3e260e70736 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -217,7 +217,7 @@ static void setup_mask_framebuffer(struct pipe_surface *surf, static void setup_mask_operation(VGMaskOperation operation) { struct vg_context *ctx = vg_current_context(); - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; const VGint param_bytes = 4 * sizeof(VGfloat); const VGfloat ones[4] = {1.f, 1.f, 1.f, 1.f}; void *shader = 0; @@ -225,17 +225,17 @@ static void setup_mask_operation(VGMaskOperation operation) /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + *cbuf = pipe_buffer_create(ctx->pipe->screen, 1, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, ones); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); switch (operation) { case VG_UNION_MASK: { if (!ctx->mask.union_fs) { @@ -320,22 +320,22 @@ static void setup_mask_samplers(struct pipe_texture *umask) static void setup_mask_fill(const VGfloat color[4]) { struct vg_context *ctx = vg_current_context(); - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; const VGint param_bytes = 4 * sizeof(VGfloat); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, 0, param_bytes, color); + *cbuf = pipe_buffer_create(ctx->pipe->screen, 1, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, color); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); cso_set_fragment_shader_handle(ctx->cso_context, shaders_cache_fill(ctx->sc, VEGA_SOLID_FILL_SHADER)); diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index cc73771d358..d8f6299b2db 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -77,7 +77,8 @@ struct vg_paint { struct pipe_sampler_state sampler; } pattern; - struct pipe_constant_buffer cbuf; + /* XXX next 3 all unneded? */ + struct pipe_buffer *cbuf; struct pipe_shader_state fs_state; void *fs; }; diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index b6d282d803b..d385ee567fa 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -293,6 +293,7 @@ static void draw_polygon(struct vg_context *ctx, /* tell pipe about the vertex attributes */ velement.src_offset = 0; + velement.instance_divisor = 0; velement.vertex_buffer_index = 0; velement.src_format = PIPE_FORMAT_R32G32_FLOAT; velement.nr_components = COMPONENTS; diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c index d9074a377b3..bd5ae79e551 100644 --- a/src/gallium/state_trackers/vega/shader.c +++ b/src/gallium/state_trackers/vega/shader.c @@ -51,7 +51,7 @@ struct shader { VGImageMode image_mode; float constants[MAX_CONSTANTS]; - struct pipe_constant_buffer cbuf; + struct pipe_buffer *cbuf; struct pipe_shader_state fs_state; void *fs; }; @@ -96,25 +96,25 @@ static void setup_constant_buffer(struct shader *shader) { struct vg_context *ctx = shader->context; struct pipe_context *pipe = shader->context->pipe; - struct pipe_constant_buffer *cbuf = &shader->cbuf; + struct pipe_buffer **cbuf = &shader->cbuf; VGint param_bytes = paint_constant_buffer_size(shader->paint); float temp_buf[MAX_CONSTANTS]; assert(param_bytes <= sizeof(temp_buf)); paint_fill_constant_buffer(shader->paint, temp_buf); - if (cbuf->buffer == NULL || + if (*cbuf == NULL || memcmp(temp_buf, shader->constants, param_bytes) != 0) { - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); memcpy(shader->constants, temp_buf, param_bytes); - cbuf->buffer = pipe_user_buffer_create(pipe->screen, - &shader->constants, - sizeof(shader->constants)); + *cbuf = pipe_user_buffer_create(pipe->screen, + &shader->constants, + sizeof(shader->constants)); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); } static VGint blend_bind_samplers(struct vg_context *ctx, diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c index 00d23f5c227..c16ac036e3b 100644 --- a/src/gallium/state_trackers/vega/vg_context.c +++ b/src/gallium/state_trackers/vega/vg_context.c @@ -122,8 +122,8 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, void vg_destroy_context(struct vg_context *ctx) { - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; - struct pipe_constant_buffer *vsbuf = &ctx->vs_const_buffer; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; + struct pipe_buffer **vsbuf = &ctx->vs_const_buffer; util_destroy_blit(ctx->blit); renderer_destroy(ctx->renderer); @@ -131,11 +131,11 @@ void vg_destroy_context(struct vg_context *ctx) shader_destroy(ctx->shader); paint_destroy(ctx->default_paint); - if (cbuf && cbuf->buffer) - pipe_buffer_reference(&cbuf->buffer, NULL); + if (*cbuf) + pipe_buffer_reference(cbuf, NULL); - if (vsbuf && vsbuf->buffer) - pipe_buffer_reference(&vsbuf->buffer, NULL); + if (*vsbuf) + pipe_buffer_reference(vsbuf, NULL); if (ctx->clear.fs) { cso_delete_fragment_shader(ctx->cso_context, ctx->clear.fs); @@ -371,20 +371,20 @@ void vg_validate_state(struct vg_context *ctx) 2.f/fb->width, 2.f/fb->height, 1, 1, -1, -1, 0, 0 }; - struct pipe_constant_buffer *cbuf = &ctx->vs_const_buffer; + struct pipe_buffer **cbuf = &ctx->vs_const_buffer; vg_set_viewport(ctx, VEGA_Y0_BOTTOM); - pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 16, + pipe_buffer_reference(cbuf, NULL); + *cbuf = pipe_buffer_create(ctx->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, vs_consts); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, *cbuf); } if ((ctx->state.dirty & VS_DIRTY)) { cso_set_vertex_shader_handle(ctx->cso_context, diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h index ccc8889c8c5..bc88c8d139d 100644 --- a/src/gallium/state_trackers/vega/vg_context.h +++ b/src/gallium/state_trackers/vega/vg_context.h @@ -50,7 +50,7 @@ struct st_renderbuffer { }; struct st_framebuffer { - VGint init_width, init_height; + VGint width, height; struct st_renderbuffer *strb; struct st_renderbuffer *dsrb; @@ -113,7 +113,7 @@ struct vg_context } clear; struct { - struct pipe_constant_buffer cbuf; + struct pipe_buffer *cbuf; struct pipe_sampler_state sampler; struct vg_shader *union_fs; @@ -135,7 +135,7 @@ struct vg_context struct pipe_sampler_state blend_sampler; struct { - struct pipe_constant_buffer buffer; + struct pipe_buffer *buffer; void *color_matrix_fs; } filter; struct vg_paint *default_paint; @@ -145,7 +145,7 @@ struct vg_context struct vg_shader *plain_vs; struct vg_shader *clear_vs; struct vg_shader *texture_vs; - struct pipe_constant_buffer vs_const_buffer; + struct pipe_buffer *vs_const_buffer; }; struct vg_object { diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index ff80aab03a3..617c174eb6a 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -193,8 +193,8 @@ struct st_framebuffer * st_create_framebuffer(const void *visual, */ stfb->alpha_mask = 0; - stfb->init_width = width; - stfb->init_height = height; + stfb->width = width; + stfb->height = height; stfb->privateData = privateData; } @@ -282,11 +282,14 @@ void st_resize_framebuffer(struct st_framebuffer *stfb, /* If this is a noop, exit early and don't do the clear, etc below. */ - if (strb->width == width && - strb->height == height && + if (stfb->width == width && + stfb->height == height && state->zsbuf) return; + stfb->width = width; + stfb->height = height; + if (strb->width != width || strb->height != height) st_renderbuffer_alloc_storage(ctx, strb, width, height); diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index d80f341e6c2..8f729b565b1 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -379,14 +379,14 @@ struct xorg_renderer * renderer_create(struct pipe_context *pipe) void renderer_destroy(struct xorg_renderer *r) { - struct pipe_constant_buffer *vsbuf = &r->vs_const_buffer; - struct pipe_constant_buffer *fsbuf = &r->fs_const_buffer; + struct pipe_buffer **vsbuf = &r->vs_const_buffer; + struct pipe_buffer **fsbuf = &r->fs_const_buffer; - if (vsbuf && vsbuf->buffer) - pipe_buffer_reference(&vsbuf->buffer, NULL); + if (*vsbuf) + pipe_buffer_reference(vsbuf, NULL); - if (fsbuf && fsbuf->buffer) - pipe_buffer_reference(&fsbuf->buffer, NULL); + if (*fsbuf) + pipe_buffer_reference(fsbuf, NULL); if (r->shaders) { xorg_shaders_destroy(r->shaders); @@ -409,20 +409,20 @@ void renderer_set_constants(struct xorg_renderer *r, const float *params, int param_bytes) { - struct pipe_constant_buffer *cbuf = + struct pipe_buffer **cbuf = (shader_type == PIPE_SHADER_VERTEX) ? &r->vs_const_buffer : &r->fs_const_buffer; - pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(r->pipe->screen, 16, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); + pipe_buffer_reference(cbuf, NULL); + *cbuf = pipe_buffer_create(r->pipe->screen, 16, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); - if (cbuf->buffer) { - pipe_buffer_write(r->pipe->screen, cbuf->buffer, + if (*cbuf) { + pipe_buffer_write(r->pipe->screen, *cbuf, 0, param_bytes, params); } - r->pipe->set_constant_buffer(r->pipe, shader_type, 0, cbuf); + r->pipe->set_constant_buffer(r->pipe, shader_type, 0, *cbuf); } diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.h b/src/gallium/state_trackers/xorg/xorg_renderer.h index 5272cde2b3f..af6aa0567d6 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.h +++ b/src/gallium/state_trackers/xorg/xorg_renderer.h @@ -23,8 +23,8 @@ struct xorg_renderer { int fb_width; int fb_height; - struct pipe_constant_buffer vs_const_buffer; - struct pipe_constant_buffer fs_const_buffer; + struct pipe_buffer *vs_const_buffer; + struct pipe_buffer *fs_const_buffer; float buffer[BUF_SIZE]; int buffer_size; diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index 6b5a41a3727..5bf0e94b627 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -486,8 +486,11 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, int dxo, dyo; Bool hdtv; int x, y, w, h; - struct exa_pixmap_priv *dst = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); + struct exa_pixmap_priv *dst; + struct pipe_surface *dst_surf = NULL; + + exaMoveInPixmap(pPixmap); + dst = exaGetPixmapDriverPrivate(pPixmap); if (dst && !dst->tex) { xorg_exa_set_shared_usage(pPixmap); @@ -497,6 +500,7 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, if (!dst || !dst->tex) XORG_FALLBACK("Xv destination %s", !dst ? "!dst" : "!dst->tex"); + dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y)); REGION_TRANSLATE(pScrn->pScreen, dstRegion, -pPixmap->screen_x, @@ -516,7 +520,6 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, bind_samplers(pPriv); setup_fs_video_constants(pPriv->r, hdtv); - exaMoveInPixmap(pPixmap); DamageDamageRegion(&pPixmap->drawable, dstRegion); while (nbox--) { diff --git a/src/gallium/winsys/drm/i965/xorg/Makefile b/src/gallium/winsys/drm/i965/xorg/Makefile index d91d0006efd..c25726b0bb1 100644 --- a/src/gallium/winsys/drm/i965/xorg/Makefile +++ b/src/gallium/winsys/drm/i965/xorg/Makefile @@ -1,19 +1,25 @@ -TARGET = modesetting_drv.so -CFILES = $(wildcard ./*.c) -OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) TOP = ../../../../../.. -include $(TOP)/configs/current -INCLUDES = \ - $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \ - -I../gem \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include \ - -I$(TOP)/src/egl/main +GALLIUMDIR = $(TOP)/src/gallium + +TARGET = i965g_drv.so + +CFILES = $(wildcard ./*.c) + +include ${TOP}/configs/current + +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) + +CFLAGS = -DHAVE_CONFIG_H \ + -g -Wall -Wimplicit-function-declaration -fPIC \ + $(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/drivers \ + -I${GALLIUMDIR}/auxiliary \ + -I${TOP}/src/mesa \ + -I$(TOP)/include \ + -I$(TOP)/src/egl/main LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ @@ -23,20 +29,21 @@ LIBS = \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) -DRIVER_DEFINES = \ - -DHAVE_CONFIG_H - - +TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) ############################################# +all default: $(TARGET) $(TARGET_STAGING) - -all default: $(TARGET) - -$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS) +$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS) $(TOP)/bin/mklib -noprefix -o $@ \ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel +$(TOP)/$(LIB_DIR)/gallium: + mkdir -p $@ + +$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium + $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium + clean: rm -rf $(OBJECTS) $(TARGET) @@ -44,14 +51,4 @@ install: $(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) $(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) - -############################################## - - -.c.o: - $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@ - - -############################################## - .PHONY = all clean install diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 385fa857b56..b020ff38fa0 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -58,6 +58,7 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, { struct radeon_winsys *radeon_ws = (struct radeon_winsys *)ws; struct radeon_pipe_buffer *radeon_buffer; + struct pb_desc desc; uint32_t domain; radeon_buffer = CALLOC_STRUCT(radeon_pipe_buffer); @@ -70,6 +71,14 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, radeon_buffer->base.usage = usage; radeon_buffer->base.size = size; + if (usage == PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) { + /* Don't bother allocating a BO, as it'll never get to the card. */ + desc.alignment = alignment; + desc.usage = usage; + radeon_buffer->pb = pb_malloc_buffer_create(size, &desc); + return &radeon_buffer->base; + } + domain = 0; if (usage & PIPE_BUFFER_USAGE_PIXEL) { @@ -133,8 +142,16 @@ static void radeon_buffer_del(struct pipe_buffer *buffer) struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; - radeon_bo_unref(radeon_buffer->bo); - free(radeon_buffer); + if (radeon_buffer->pb) { + pipe_reference_init(&radeon_buffer->pb->base.reference, 0); + pb_destroy(radeon_buffer->pb); + } + + if (radeon_buffer->bo) { + radeon_bo_unref(radeon_buffer->bo); + } + + FREE(radeon_buffer); } static void *radeon_buffer_map(struct pipe_winsys *ws, @@ -146,6 +163,10 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; + if (radeon_buffer->pb) { + return pb_map(radeon_buffer->pb, flags); + } + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { uint32_t domain; @@ -174,7 +195,11 @@ static void radeon_buffer_unmap(struct pipe_winsys *ws, struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; - radeon_bo_unmap(radeon_buffer->bo); + if (radeon_buffer->pb) { + pb_unmap(radeon_buffer->pb); + } else { + radeon_bo_unmap(radeon_buffer->bo); + } } static void radeon_fence_reference(struct pipe_winsys *ws, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index d7f17564a9f..de71cb2f42d 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -//#include "state_tracker/st_public.h" +#include "pipebuffer/pb_buffer.h" #include "util/u_memory.h" @@ -49,7 +49,10 @@ struct radeon_pipe_buffer { struct pipe_buffer base; + /* Pointer to GPU-backed BO. */ struct radeon_bo *bo; + /* Pointer to fallback PB buffer. */ + struct pb_buffer *pb; boolean flinked; uint32_t flink; }; diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 851c2236979..9552f0ad6a9 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -40,12 +40,16 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) struct drm_radeon_info info = {0}; int target = 0; int retval; + drmVersionPtr version; info.value = (unsigned long)⌖ /* We do things in a specific order here. * - * First, the PCI ID. This is essential and should return usable numbers + * DRM version first. We need to be sure we're running on a KMS chipset. + * This is also for some features. + * + * Then, the PCI ID. This is essential and should return usable numbers * for all Radeons. If this fails, we probably got handed an FD for some * non-Radeon card. * @@ -55,8 +59,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) * * The GEM info is actually bogus on the kernel side, as well as our side * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because - * we don't actually use the info for anything yet. - * XXX update the above when we can safely use vram_size instead of vram_visible */ + * we don't actually use the info for anything yet. */ + + version = drmGetVersion(fd); + if (version->version_major != 2) { + fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " + "only compatible with 2.x.x\n", __FUNCTION__, + version->version_major, version->version_minor, + version->version_patchlevel); + drmFreeVersion(version); + exit(1); + } + info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { @@ -92,16 +106,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) exit(1); } winsys->gart_size = gem_info.gart_size; - /* XXX */ - winsys->vram_size = gem_info.vram_visible; -} - -/* Guess at whether this chipset should use r300g. - * - * I believe that this check is valid, but I haven't been exhaustive. */ -static boolean is_r3xx(int pciid) -{ - return (pciid > 0x3150) && (pciid < 0x796f); + winsys->vram_size = gem_info.vram_size; + + debug_printf("radeon: Successfully grabbed chipset info from kernel!\n" + "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n" + "radeon: GART size: %d MB VRAM size: %d MB\n", + version->version_major, version->version_minor, + version->version_patchlevel, winsys->pci_id, + winsys->gb_pipes, winsys->z_pipes, + winsys->gart_size / 1024 / 1024, + winsys->vram_size / 1024 / 1024); + + drmFreeVersion(version); } /* Create a pipe_screen. */ @@ -136,12 +152,13 @@ struct pipe_context* radeon_create_context(struct drm_api* api, } boolean radeon_buffer_from_texture(struct drm_api* api, + struct pipe_screen* screen, struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride) { /* XXX fix this */ - return r300_get_texture_buffer(texture, buffer, stride); + return r300_get_texture_buffer(screen, texture, buffer, stride); } /* Create a buffer from a handle. */ @@ -208,7 +225,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api, struct radeon_pipe_buffer* radeon_buffer; struct pipe_buffer *buffer = NULL; - if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { + if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) { return FALSE; } @@ -240,7 +257,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, unsigned *handle) { struct pipe_buffer *buffer = NULL; - if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { + if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) { return FALSE; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h index bf0e78138d7..ddd7983824a 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h @@ -56,6 +56,7 @@ struct pipe_context* radeon_create_context(struct drm_api* api, struct pipe_screen* screen); boolean radeon_buffer_from_texture(struct drm_api* api, + struct pipe_screen* screen, struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); @@ -76,4 +77,13 @@ boolean radeon_global_handle_from_buffer(struct drm_api* api, unsigned* handle); void radeon_destroy_drm_api(struct drm_api* api); + +/* Guess at whether this chipset should use r300g. + * + * I believe that this check is valid, but I haven't been exhaustive. */ +static boolean is_r3xx(int pciid) +{ + return (pciid > 0x3150) && (pciid < 0x796f); +} + #endif diff --git a/src/gallium/winsys/drm/radeon/dri/Makefile b/src/gallium/winsys/drm/radeon/dri/Makefile index a9889444de8..eaa34180321 100644 --- a/src/gallium/winsys/drm/radeon/dri/Makefile +++ b/src/gallium/winsys/drm/radeon/dri/Makefile @@ -2,7 +2,7 @@ TOP = ../../../../../.. include $(TOP)/configs/current -LIBNAME = radeon_dri.so +LIBNAME = radeong_dri.so MINIGLX_SOURCES = diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 420dccc92c9..1d9bac3871c 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -274,7 +274,7 @@ xlib_eglTerminate(_EGLDriver *drv, _EGLDisplay *dpy) static _EGLProc -xlib_eglGetProcAddress(const char *procname) +xlib_eglGetProcAddress(_EGLDriver *drv, const char *procname) { return (_EGLProc) st_get_proc_address(procname); } diff --git a/src/glsl/cl/sl_cl_parse.c b/src/glsl/cl/sl_cl_parse.c index e9b3707ac12..e256ab82138 100644 --- a/src/glsl/cl/sl_cl_parse.c +++ b/src/glsl/cl/sl_cl_parse.c @@ -345,7 +345,7 @@ struct parse_state { }; -static __inline unsigned int +static unsigned int _emit(struct parse_context *ctx, unsigned int *out, unsigned char b) diff --git a/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h b/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h index 449329665cc..ba6868a3063 100644 --- a/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h +++ b/src/glu/sgi/libnurbs/interface/bezierPatchMesh.h @@ -33,6 +33,7 @@ #ifndef _BEZIERPATCHMESH_H #define _BEZIERPATCHMESH_H +#include <GL/gl.h> #include "bezierPatch.h" typedef struct bezierPatchMesh{ diff --git a/src/glu/sgi/libnurbs/interface/glsurfeval.h b/src/glu/sgi/libnurbs/interface/glsurfeval.h index 1567c6b098c..621e59391aa 100644 --- a/src/glu/sgi/libnurbs/interface/glsurfeval.h +++ b/src/glu/sgi/libnurbs/interface/glsurfeval.h @@ -83,7 +83,7 @@ typedef struct surfEvalMachine{ class StoredVertex { public: - StoredVertex() { type = 0; } + StoredVertex() { type = 0; coord[0] = 0; coord[1] = 0; point[0] = 0; point[1] = 0; } ~StoredVertex(void) {} void saveEvalCoord(REAL x, REAL y) {coord[0] = x; coord[1] = y; type = TYPECOORD; } diff --git a/src/glu/sgi/libnurbs/internals/mesher.cc b/src/glu/sgi/libnurbs/internals/mesher.cc index 9cc436adbf3..b2d83f41288 100644 --- a/src/glu/sgi/libnurbs/internals/mesher.cc +++ b/src/glu/sgi/libnurbs/internals/mesher.cc @@ -58,6 +58,9 @@ Mesher::Mesher( Backend& b ) { stacksize = 0; vdata = 0; + last[0] = 0; + last[1] = 0; + itop = 0; lastedge = 0; //needed to prevent purify UMR } diff --git a/src/glu/sgi/libnurbs/internals/reader.cc b/src/glu/sgi/libnurbs/internals/reader.cc index 6135eef60ee..c59240d26ab 100644 --- a/src/glu/sgi/libnurbs/internals/reader.cc +++ b/src/glu/sgi/libnurbs/internals/reader.cc @@ -64,6 +64,7 @@ O_pwlcurve::O_pwlcurve( long _type, long count, INREAL *array, long byte_stride, owner = 0; pts = trimpts; npts = (int) count; + save = 0; int i; /* copy user data into internal trimming data structures */ @@ -115,6 +116,7 @@ O_pwlcurve::O_pwlcurve( long _type, long count, INREAL *array, long byte_stride, owner = 0; pts = trimpts; npts = (int) count; + save = 0; /* copy user data into internal trimming data structures */ switch( _type ) { diff --git a/src/glu/sgi/libnurbs/internals/renderhints.cc b/src/glu/sgi/libnurbs/internals/renderhints.cc index a3aa62d42cf..7025f74f5b6 100644 --- a/src/glu/sgi/libnurbs/internals/renderhints.cc +++ b/src/glu/sgi/libnurbs/internals/renderhints.cc @@ -54,6 +54,10 @@ Renderhints::Renderhints() errorchecking = N_MSG; subdivisions = 6.0; tmp1 = 0.0; + displaydomain = 0; + maxsubdivisions = (int) subdivisions; + wiretris = 0; + wirequads = 0; } void diff --git a/src/glu/sgi/libnurbs/internals/simplemath.h b/src/glu/sgi/libnurbs/internals/simplemath.h index 0a060c57ead..d00062dc700 100644 --- a/src/glu/sgi/libnurbs/internals/simplemath.h +++ b/src/glu/sgi/libnurbs/internals/simplemath.h @@ -38,6 +38,8 @@ /* simple inline routines */ +#include "types.h" + inline int max( int x, int y ) { return ( x < y ) ? y : x; } diff --git a/src/glu/sgi/libnurbs/internals/slicer.cc b/src/glu/sgi/libnurbs/internals/slicer.cc index 27d2a650d11..1b18d73c176 100644 --- a/src/glu/sgi/libnurbs/internals/slicer.cc +++ b/src/glu/sgi/libnurbs/internals/slicer.cc @@ -1181,6 +1181,10 @@ void Slicer::slice(Arc_ptr loop) Slicer::Slicer( Backend &b ) : CoveAndTiler( b ), Mesher( b ), backend( b ) { + oneOverDu = 0; + du = 0; + dv = 0; + isolines = 0; ulinear = 0; vlinear = 0; } diff --git a/src/glx/x11/dri2_glx.c b/src/glx/x11/dri2_glx.c index 83149062f31..7b0c52b50d4 100644 --- a/src/glx/x11/dri2_glx.c +++ b/src/glx/x11/dri2_glx.c @@ -205,14 +205,14 @@ static int dri2DrawableGetMSC(__GLXscreenConfigs *psc, __GLXDRIdrawable *pdraw, int64_t *ust, int64_t *msc, int64_t *sbc) { - return DRI2GetMSC(psc->dpy, pdraw->drawable, ust, msc, sbc); + return DRI2GetMSC(psc->dpy, pdraw->xDrawable, ust, msc, sbc); } static int dri2WaitForMSC(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, int64_t remainder, int64_t *ust, int64_t *msc, int64_t *sbc) { - return DRI2WaitMSC(pdraw->psc->dpy, pdraw->drawable, target_msc, divisor, + return DRI2WaitMSC(pdraw->psc->dpy, pdraw->xDrawable, target_msc, divisor, remainder, ust, msc, sbc); } @@ -220,7 +220,7 @@ static int dri2WaitForSBC(__GLXDRIdrawable *pdraw, int64_t target_sbc, int64_t *ust, int64_t *msc, int64_t *sbc) { - return DRI2WaitSBC(pdraw->psc->dpy, pdraw->drawable, target_sbc, ust, msc, + return DRI2WaitSBC(pdraw->psc->dpy, pdraw->xDrawable, target_sbc, ust, msc, sbc); } @@ -381,7 +381,7 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, return 0; } - DRI2SwapBuffers(pdraw->psc->dpy, pdraw->drawable, target_msc, divisor, + DRI2SwapBuffers(pdraw->psc->dpy, pdraw->xDrawable, target_msc, divisor, remainder, &ret); #if __DRI2_FLUSH_VERSION >= 2 @@ -575,11 +575,19 @@ dri2CreateScreen(__GLXscreenConfigs * psc, int screen, psp->swapBuffers = dri2SwapBuffers; psp->waitGL = dri2WaitGL; psp->waitX = dri2WaitX; - psp->getDrawableMSC = dri2DrawableGetMSC; - psp->waitForMSC = dri2WaitForMSC; - psp->waitForSBC = dri2WaitForSBC; - psp->setSwapInterval = dri2SetSwapInterval; - psp->getSwapInterval = dri2GetSwapInterval; + if (pdp->driMinor >= 2) { + psp->getDrawableMSC = dri2DrawableGetMSC; + psp->waitForMSC = dri2WaitForMSC; + psp->waitForSBC = dri2WaitForSBC; + psp->setSwapInterval = dri2SetSwapInterval; + psp->getSwapInterval = dri2GetSwapInterval; + } else { + psp->getDrawableMSC = NULL; + psp->waitForMSC = NULL; + psp->waitForSBC = NULL; + psp->setSwapInterval = NULL; + psp->getSwapInterval = NULL; + } /* DRI2 suports SubBuffer through DRI2CopyRegion, so it's always * available.*/ diff --git a/src/glx/x11/indirect.c b/src/glx/x11/indirect.c index ea90ce44630..262637a9473 100644 --- a/src/glx/x11/indirect.c +++ b/src/glx/x11/indirect.c @@ -47,7 +47,7 @@ # else # define FASTCALL # endif -# if defined(__GNUC__) +# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define NOINLINE __attribute__((noinline)) # else # define NOINLINE diff --git a/src/glx/x11/indirect.h b/src/glx/x11/indirect.h index 19a8c0d134a..9e73b338184 100644 --- a/src/glx/x11/indirect.h +++ b/src/glx/x11/indirect.h @@ -37,7 +37,7 @@ * \author Ian Romanick <[email protected]> */ -# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__) +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__) # define HIDDEN __attribute__((visibility("hidden"))) # else # define HIDDEN @@ -47,7 +47,7 @@ # else # define FASTCALL # endif -# if defined(__GNUC__) +# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define NOINLINE __attribute__((noinline)) # else # define NOINLINE diff --git a/src/glx/x11/indirect_size.c b/src/glx/x11/indirect_size.c index cdaf02ffe6a..f8541b5758b 100644 --- a/src/glx/x11/indirect_size.c +++ b/src/glx/x11/indirect_size.c @@ -29,7 +29,7 @@ #include <GL/gl.h> #include "indirect_size.h" -# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) +# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PURE __attribute__((pure)) # else # define PURE @@ -41,7 +41,7 @@ # define FASTCALL # endif -# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__) +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__) # define INTERNAL __attribute__((visibility("internal"))) # else # define INTERNAL diff --git a/src/glx/x11/indirect_size.h b/src/glx/x11/indirect_size.h index 9ba0bd69075..af0919f9645 100644 --- a/src/glx/x11/indirect_size.h +++ b/src/glx/x11/indirect_size.h @@ -36,7 +36,7 @@ * \author Ian Romanick <[email protected]> */ -# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) +# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PURE __attribute__((pure)) # else # define PURE @@ -48,7 +48,7 @@ # define FASTCALL # endif -# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__) +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__) # define INTERNAL __attribute__((visibility("internal"))) # else # define INTERNAL diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index bac1c3a49c3..016f27a6a31 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -295,8 +295,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), &brw->cc.vp_bo, 1, - &cc, sizeof(cc), - NULL, NULL); + &cc, sizeof(cc)); /* Emit CC viewport relocation */ dri_bo_emit_reloc(bo, diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index af1d975de95..d3275c7a89d 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -130,13 +130,14 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache( &brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->clip.prog_data ); + brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->clip.prog_data); } /* Calculate interpolants for triangle and line rasterization. diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index c8f24a94e4d..22df7722b68 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -143,8 +143,7 @@ clip_unit_create_from_key(struct brw_context *brw, bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, key, sizeof(*key), &brw->clip.prog_bo, 1, - &clip, sizeof(clip), - NULL, NULL); + &clip, sizeof(clip)); /* Emit clip program relocation */ assert(brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0dd3087143f..79818b92b75 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,7 +131,6 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -332,7 +331,6 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; - GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; /* Record of the last BOs chosen for each cache_id. Used to set @@ -583,6 +581,7 @@ struct brw_context struct { struct brw_vs_prog_data *prog_data; + int8_t *constant_map; /* variable array following prog_data */ dri_bo *prog_bo; dri_bo *state_bo; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 190310afbb0..22e3e732f40 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -256,13 +256,24 @@ static void prepare_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - /* XXX just use a memcpy here */ - for (i = 0; i < nr; i++) { - const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; - buf[offset + i * 4 + 0] = value[0]; - buf[offset + i * 4 + 1] = value[1]; - buf[offset + i * 4 + 2] = value[2]; - buf[offset + i * 4 + 3] = value[3]; + if (vp->use_const_buffer) { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + } + } else { + for (i = 0; i < nr; i++) { + memcpy(buf + offset + i * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 1bc3eccf49d..7261b316c10 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -125,12 +125,13 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->gs.prog_data ); + brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->gs.prog_data); } static const GLenum gs_prim[GL_POLYGON+1] = { diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 1af5790a676..7d5a944bf7d 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -108,8 +108,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, key, sizeof(*key), &brw->gs.prog_bo, 1, - &gs, sizeof(gs), - NULL, NULL); + &gs, sizeof(gs)); if (key->prog_active) { /* Emit GS program relocation */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 968890f7fb1..8e6839b8120 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -117,12 +117,13 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->sf.prog_data ); + brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->sf.prog_data); } /* Calculate interpolants for triangle and line rasterization. diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 09223b7cfb1..b9b42cd6d55 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -309,8 +309,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), reloc_bufs, 2, - &sf, sizeof(sf), - NULL, NULL); + &sf, sizeof(sf)); /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 9c9d145c4b6..536fe8b249b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -124,16 +124,26 @@ dri_bo *brw_cache_data(struct brw_cache *cache, dri_bo **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); +drm_intel_bo *brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz); + +drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + GLuint aux_sz, + void *aux_return); dri_bo *brw_search_cache( struct brw_cache *cache, enum brw_cache_id cache_id, diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index e4c9ba7d870..5fc47b0420a 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -71,25 +71,23 @@ static GLuint -hash_key(const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +hash_key(struct brw_cache_item *item) { - GLuint *ikey = (GLuint *)key; - GLuint hash = 0, i; + GLuint *ikey = (GLuint *)item->key; + GLuint hash = item->cache_id, i; - assert(key_size % 4 == 0); + assert(item->key_size % 4 == 0); /* I'm sure this can be improved on: */ - for (i = 0; i < key_size/4; i++) { + for (i = 0; i < item->key_size/4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } /* Include the BO pointers as key data as well */ - ikey = (GLuint *)reloc_bufs; - key_size = nr_reloc_bufs * sizeof(dri_bo *); - for (i = 0; i < key_size/4; i++) { + ikey = (GLuint *)item->reloc_bufs; + for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } @@ -114,11 +112,22 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } +static int +brw_cache_item_equals(const struct brw_cache_item *a, + const struct brw_cache_item *b) +{ + return a->cache_id == b->cache_id && + a->hash == b->hash && + a->key_size == b->key_size && + (memcmp(a->key, b->key, a->key_size) == 0) && + a->nr_reloc_bufs == b->nr_reloc_bufs && + (memcmp(a->reloc_bufs, b->reloc_bufs, + a->nr_reloc_bufs * sizeof(dri_bo *)) == 0); +} static struct brw_cache_item * -search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - GLuint hash, const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +search_cache(struct brw_cache *cache, GLuint hash, + struct brw_cache_item *lookup) { struct brw_cache_item *c; @@ -133,13 +142,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, #endif for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->cache_id == cache_id && - c->hash == hash && - c->key_size == key_size && - memcmp(c->key, key, key_size) == 0 && - c->nr_reloc_bufs == nr_reloc_bufs && - memcmp(c->reloc_bufs, reloc_bufs, - nr_reloc_bufs * sizeof(dri_bo *)) == 0) + if (brw_cache_item_equals(lookup, c)) return c; } @@ -182,10 +185,18 @@ brw_search_cache(struct brw_cache *cache, void *aux_return) { struct brw_cache_item *item; - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + struct brw_cache_item lookup; + GLuint hash; - item = search_cache(cache, cache_id, hash, key, key_size, - reloc_bufs, nr_reloc_bufs); + lookup.cache_id = cache_id; + lookup.key = key; + lookup.key_size = key_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item == NULL) return NULL; @@ -200,26 +211,34 @@ brw_search_cache(struct brw_cache *cache, } -dri_bo * -brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) +drm_intel_bo * +brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + GLuint aux_size, + void *aux_return) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint hash; GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); - GLuint aux_size = cache->aux_size[cache_id]; void *tmp; dri_bo *bo; int i; + item->cache_id = cache_id; + item->key = key; + item->key_size = key_size; + item->reloc_bufs = reloc_bufs; + item->nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(item); + item->hash = hash; + /* Create the buffer object to contain the data */ bo = dri_bo_alloc(cache->brw->intel.bufmgr, cache->name[cache_id], data_size, 1 << 6); @@ -229,19 +248,15 @@ brw_upload_cache( struct brw_cache *cache, tmp = _mesa_malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); - memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size, aux, aux_size); memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) dri_bo_reference(reloc_bufs[i]); } - item->cache_id = cache_id; item->key = tmp; - item->hash = hash; - item->key_size = key_size; item->reloc_bufs = tmp + key_size + aux_size; - item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; dri_bo_reference(bo); @@ -255,7 +270,6 @@ brw_upload_cache( struct brw_cache *cache, cache->n_items++; if (aux_return) { - assert(cache->aux_size[cache_id]); *(void **)aux_return = (void *)((char *)item->key + item->key_size); } @@ -272,6 +286,23 @@ brw_upload_cache( struct brw_cache *cache, return bo; } +drm_intel_bo * +brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size) +{ + return brw_upload_cache_with_auxdata(cache, cache_id, + key, key_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, 0, + NULL); +} /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. @@ -292,11 +323,18 @@ brw_cache_data(struct brw_cache *cache, GLuint nr_reloc_bufs) { dri_bo *bo; - struct brw_cache_item *item; - GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); - - item = search_cache(cache, cache_id, hash, data, data_size, - reloc_bufs, nr_reloc_bufs); + struct brw_cache_item *item, lookup; + GLuint hash; + + lookup.cache_id = cache_id; + lookup.key = data; + lookup.key_size = data_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item) { update_cache_last(cache, cache_id, item->bo); dri_bo_reference(item->bo); @@ -306,8 +344,7 @@ brw_cache_data(struct brw_cache *cache, bo = brw_upload_cache(cache, cache_id, data, data_size, reloc_bufs, nr_reloc_bufs, - data, data_size, - NULL, NULL); + data, data_size); return bo; } @@ -321,11 +358,9 @@ enum pool_type { static void brw_init_cache_id(struct brw_cache *cache, const char *name, - enum brw_cache_id id, - GLuint aux_size) + enum brw_cache_id id) { cache->name[id] = strdup(name); - cache->aux_size[id] = aux_size; } @@ -341,80 +376,28 @@ brw_init_non_surface_cache(struct brw_context *brw) cache->items = (struct brw_cache_item **) _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, - "CC_VP", - BRW_CC_VP, - 0); - - brw_init_cache_id(cache, - "CC_UNIT", - BRW_CC_UNIT, - 0); - - brw_init_cache_id(cache, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_data)); - - brw_init_cache_id(cache, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - 0); - - brw_init_cache_id(cache, - "SAMPLER", - BRW_SAMPLER, - 0); - - brw_init_cache_id(cache, - "WM_UNIT", - BRW_WM_UNIT, - 0); - - brw_init_cache_id(cache, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_data)); - - brw_init_cache_id(cache, - "SF_VP", - BRW_SF_VP, - 0); - - brw_init_cache_id(cache, - "SF_UNIT", - BRW_SF_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_UNIT", - BRW_VS_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_data)); - - brw_init_cache_id(cache, - "CLIP_UNIT", - BRW_CLIP_UNIT, - 0); - - brw_init_cache_id(cache, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_data)); - - brw_init_cache_id(cache, - "GS_UNIT", - BRW_GS_UNIT, - 0); - - brw_init_cache_id(cache, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_data)); + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR); + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP); + + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT); + + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT); + + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG); + + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT); + + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG); + + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); + + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); } @@ -430,15 +413,8 @@ brw_init_surface_cache(struct brw_context *brw) cache->items = (struct brw_cache_item **) _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, - "SS_SURFACE", - BRW_SS_SURFACE, - 0); - - brw_init_cache_id(cache, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - 0); + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE); + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index af8dfb4c15c..0ecbef1ef92 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -36,13 +36,7 @@ #include "intel_batchbuffer.h" #include "intel_buffers.h" -/* This is used to initialize brw->state.atoms[]. We could use this - * list directly except for a single atom, brw_constant_buffer, which - * has a .dirty value which changes according to the parameters of the - * current fragment and vertex programs, and so cannot be a static - * value. - */ -const struct brw_tracked_state *atoms[] = +static const struct brw_tracked_state *atoms[] = { &brw_check_fallback, @@ -208,7 +202,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index fd055e225e0..44b085e214b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -35,6 +35,7 @@ #include "brw_util.h" #include "brw_state.h" #include "shader/prog_print.h" +#include "shader/prog_parameter.h" @@ -42,9 +43,11 @@ static void do_vs_prog( struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; GLuint program_size; const GLuint *program; struct brw_vs_compile c; + int aux_size; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -73,13 +76,27 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + /* We upload from &c.prog_data including the constant_map assuming + * they're packed together. It would be nice to have a + * compile-time assert macro here. + */ + assert(c.constant_map == (int8_t *)&c.prog_data + + sizeof(c.prog_data)); + assert(ctx->Const.VertexProgram.MaxNativeParameters == + ARRAY_SIZE(c.constant_map)); + + aux_size = sizeof(c.prog_data); + if (c.vp->use_const_buffer) + aux_size += c.vp->program.Base.Parameters->NumParameters; + dri_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->vs.prog_data ); + brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + aux_size, + &brw->vs.prog_data); } @@ -109,6 +126,8 @@ static void brw_upload_vs_prog(struct brw_context *brw) &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); + brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + + sizeof(*brw->vs.prog_data)); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 4a591365c98..95e0501b1eb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -51,6 +51,7 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; + int8_t constant_map[1024]; struct brw_vertex_program *vp; @@ -81,6 +82,8 @@ struct brw_vs_compile { GLint index; struct brw_reg reg; } current_const[3]; + + GLboolean needs_stack; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 1b84dd505f6..52cc04fee87 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -104,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ if (c->vp->use_const_buffer) { - /* get constants from a real constant buffer */ - c->prog_data.curb_read_length = 0; - c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + int constant = 0; + + /* We've got more constants than we can load with the push + * mechanism. This is often correlated with reladdr loads where + * we should probably be using a pull mechanism anyway to avoid + * excessive reading. However, the pull mechanism is slow in + * general. So, we try to allocate as many non-reladdr-loaded + * constants through the push buffer as we can before giving up. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || + inst->SrcReg[arg].RelAddr) + continue; + + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } + } + } + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, + (i%2) * 4), + 0, 4, 1); + } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + /* XXX 0 causes a bug elsewhere... */ + c->prog_data.nr_params = MAX2(constant * 4, 4); } else { /* use a section of the GRF for constants */ @@ -214,8 +252,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } } - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; + if (c->needs_stack) { + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + } /* Some opcodes need an internal temporary: */ @@ -762,15 +802,14 @@ get_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; - struct brw_reg const_reg; - struct brw_reg const2_reg; - const GLboolean relAddr = src->RelAddr; + struct brw_reg const_reg = c->current_const[argIndex].reg; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || relAddr) { + if (c->current_const[argIndex].index != src->Index) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; #if 0 @@ -779,48 +818,74 @@ get_constant(struct brw_vs_compile *c, #endif /* need to fetch the constant now */ brw_dp_READ_4_vs(p, - c->current_const[argIndex].reg,/* writeback dest */ + const_reg, /* writeback dest */ 0, /* oword */ - relAddr, /* relative indexing? */ + 0, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - - if (relAddr) { - /* second read */ - const2_reg = get_tmp(c); - - /* use upper half of address reg for second read */ - addrReg = stride(addrReg, 0, 4, 0); - addrReg.subnr = 16; - - brw_dp_READ_4_vs(p, - const2_reg, /* writeback dest */ - 1, /* oword */ - relAddr, /* relative indexing? */ - addrReg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER - ); - } } - const_reg = c->current_const[argIndex].reg; + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; - if (relAddr) { - /* merge the two Owords into the constant register */ - /* const_reg[7..4] = const2_reg[7..4] */ - brw_MOV(p, - suboffset(stride(const_reg, 0, 4, 1), 4), - suboffset(stride(const2_reg, 0, 4, 1), 4)); - release_tmp(c, const2_reg); - } - else { - /* replicate lower four floats into upper half (to get XYZWXYZW) */ - const_reg = stride(const_reg, 0, 4, 0); - const_reg.subnr = 0; - } + return const_reg; +} + +static struct brw_reg +get_reladdr_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg = c->current_const[argIndex].reg; + struct brw_reg const2_reg; + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + assert(argIndex < 3); + + /* Can't reuse a reladdr constant load. */ + c->current_const[argIndex].index = -1; + + #if 0 + printf(" fetch const[a0.x+%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + + /* fetch the first vec4 */ + brw_dp_READ_4_vs(p, + const_reg, /* writeback dest */ + 0, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + /* second vec4 */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); return const_reg; } @@ -928,7 +993,13 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: if (c->vp->use_const_buffer) { - return get_constant(c, inst, argIndex); + if (!relAddr && c->constant_map[index] != -1) { + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else if (relAddr) + return get_reladdr_constant(c, inst, argIndex); + else + return get_constant(c, inst, argIndex); } else if (relAddr) { return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); @@ -1380,12 +1451,14 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); - - /* Message registers can't be read, so copy the output into GRF register - if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { GLuint i; struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + + /* Message registers can't be read, so copy the output into GRF + * register if they are used in source registers + */ for (i = 0; i < 3; i++) { struct prog_src_register *src = &inst->SrcReg[i]; GLuint index = src->Index; @@ -1393,12 +1466,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) c->output_regs[index].used_in_src = GL_TRUE; } + + switch (inst->Opcode) { + case OPCODE_CAL: + case OPCODE_RET: + c->needs_stack = GL_TRUE; + break; + default: + break; + } } /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + if (c->needs_stack) + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (insn = 0; insn < nr_insns; insn++) { diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 345ffa7ee11..fd9f2fee423 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -164,8 +164,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, key, sizeof(*key), &brw->vs.prog_bo, 1, - &vs, sizeof(vs), - NULL, NULL); + &vs, sizeof(vs)); /* Emit VS program relocation */ dri_bo_emit_reloc(bo, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 3bc9840a971..3f6e16fcb0a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -168,8 +168,7 @@ brw_vs_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_VS_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 72749b3859d..bb7a2938123 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -172,12 +172,6 @@ static void brw_new_batch( struct intel_context *intel ) } } - -static void brw_note_fence( struct intel_context *intel, GLuint fence ) -{ - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; -} - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -193,7 +187,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.update_texture_state = 0; brw->intel.vtbl.invalidate_state = brw_invalidate_state; - brw->intel.vtbl.note_fence = brw_note_fence; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 6895f644104..fb24379c90b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -199,12 +199,13 @@ static void do_wm_prog( struct brw_context *brw, program = brw_get_program(&c->func, &program_size); dri_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - &brw->wm.prog_data ); + brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + sizeof(c->prog_data), + &brw->wm.prog_data); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index ad267a4e6ae..87387b1e2d5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -326,8 +326,7 @@ static void upload_wm_samplers( struct brw_context *brw ) brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), brw->wm.sdc_bo, key.sampler_count, - &sampler, sizeof(sampler), - NULL, NULL); + &sampler, sizeof(sampler)); /* Emit SDC relocations */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index d3373ea79e8..a7f80db554a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -210,8 +210,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, key, sizeof(*key), reloc_bufs, 3, - &wm, sizeof(wm), - NULL, NULL); + &wm, sizeof(wm)); /* Emit WM program relocation */ dri_bo_emit_reloc(bo, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f26cfabb7df..357c8c90de3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -256,8 +256,7 @@ brw_create_texture_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (key->bo) { /* Emit relocation to surface contents */ @@ -351,8 +350,7 @@ brw_create_constant_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (key->bo) { /* Emit relocation to surface contents. Section 5.1.1 of the gen4 @@ -653,8 +651,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (region_bo != NULL) { /* We might sample from it, and we might render to it, so flag * them both. We might be able to figure out from other state @@ -701,8 +698,7 @@ brw_wm_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 3f6634c65a7..d52fe2eef2c 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -506,27 +506,7 @@ intelFlush(GLcontext * ctx) static void intel_glFlush(GLcontext *ctx) { - struct intel_context *intel = intel_context(ctx); - intel_flush(ctx, GL_TRUE); - - /* We're using glFlush as an indicator that a frame is done, which is - * what DRI2 does before calling SwapBuffers (and means we should catch - * people doing front-buffer rendering, as well).. - * - * Wait for the swapbuffers before the one we just emitted, so we don't - * get too many swaps outstanding for apps that are GPU-heavy but not - * CPU-heavy. - * - * Unfortunately, we don't have a handle to the batch containing the swap, - * and getting our hands on that doesn't seem worth it, so we just us the - * first batch we emitted after the last swap. - */ - if (intel->first_post_swapbuffers_batch != NULL) { - drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); - drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); - intel->first_post_swapbuffers_batch = NULL; - } } void diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 07207bfbec9..6ba281cc144 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -107,7 +107,6 @@ struct intel_context void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); - void (*note_fence) (struct intel_context *intel, GLuint fence); void (*update_texture_state) (struct intel_context * intel); void (*render_start) (struct intel_context * intel); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e240957197d..6c2cb3b57e2 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -128,8 +128,29 @@ intelDRI2Flush(__DRIdrawable *drawable) static void intelDRI2FlushInvalidate(__DRIdrawable *drawable) { + struct intel_context *intel = drawable->driContextPriv->driverPrivate; + intelDRI2Flush(drawable); drawable->validBuffers = GL_FALSE; + + /* We're using FlushInvalidate as an indicator that a frame is + * done. It's only called immediately after SwapBuffers, so it + * won't affect front-buffer rendering or applications explicitly + * managing swap regions using MESA_copy_buffer. + * + * Wait for the swapbuffers before the one we just emitted, so we don't + * get too many swaps outstanding for apps that are GPU-heavy but not + * CPU-heavy. + * + * Unfortunately, we don't have a handle to the batch containing the swap, + * and getting our hands on that doesn't seem worth it, so we just use the + * first batch we emitted after the last swap. + */ + if (intel->first_post_swapbuffers_batch != NULL) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + } } static const struct __DRI2flushExtensionRec intelFlushExtension = { diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index 8212dc12031..ca33faff87f 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -29,8 +29,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_queryobj.c \ radeon_span.c \ - radeon_texture.c - + radeon_texture.c \ + radeon_tex_copy.c DRIVER_SOURCES = r200_context.c \ r200_ioctl.c \ @@ -46,6 +46,7 @@ DRIVER_SOURCES = r200_context.c \ r200_sanity.c \ r200_fragshader.c \ r200_vertprog.c \ + r200_blit.c \ radeon_screen.c \ $(EGL_SOURCES) \ $(RADEON_COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c new file mode 100644 index 00000000000..f899f7efdce --- /dev/null +++ b/src/mesa/drivers/dri/r200/r200_blit.c @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r200_context.h" +#include "r200_blit.h" + +static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (count) + return CP_PACKET0(reg, count - 1); + return CP_PACKET2; +} + +/* common formats supported as both textures and render targets */ +static unsigned is_blit_supported(gl_format mesa_format) +{ + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_A8: + break; + default: + return 0; + } + + /* ??? */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void emit_vtx_state(struct r200_context *r200) +{ + BATCH_LOCALS(&r200->radeon); + + BEGIN_BATCH(14); + if (r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, 0); + } else { + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS); + } + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL, (R200_VAP_FORCE_W_TO_ONE | + (9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT))); + OUT_BATCH_REGVAL(R200_SE_VTX_STATE_CNTL, 0); + OUT_BATCH_REGVAL(R200_SE_VTE_CNTL, 0); + OUT_BATCH_REGVAL(R200_SE_VTX_FMT_0, R200_VTX_XY); + OUT_BATCH_REGVAL(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); + OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_ROUND | + RADEON_ROUND_PREC_4TH_PIX)); + END_BATCH(); +} + +static void inline emit_tx_setup(struct r200_context *r200, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + uint32_t txformat = R200_TXFORMAT_NON_POWER2; + BATCH_LOCALS(&r200->radeon); + + assert(width <= 2047); + assert(height <= 2047); + assert(offset % 32 == 0); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_XRGB8888: + txformat |= R200_TXFORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + txformat |= R200_TXFORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_ARGB1555: + txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_A8: + txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + default: + break; + } + + BEGIN_BATCH(28); + OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); + OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0); + OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0); + OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO | + R200_TXC_ARG_B_ZERO | + R200_TXC_ARG_C_R0_COLOR | + R200_TXC_OP_MADD)); + OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); + OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO | + R200_TXA_ARG_B_ZERO | + R200_TXA_ARG_C_R0_ALPHA | + R200_TXA_OP_MADD)); + OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); + OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST | + R200_CLAMP_T_CLAMP_LAST | + R200_MAG_FILTER_NEAREST | + R200_MIN_FILTER_NEAREST)); + OUT_BATCH_REGVAL(R200_PP_TXFORMAT_0, txformat); + OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0); + OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) | + ((height - 1) << RADEON_TEX_VSIZE_SHIFT))); + OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32); + + OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + END_BATCH(); +} + +static inline void emit_cb_setup(struct r200_context *r200, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + uint32_t dst_pitch = pitch; + uint32_t dst_format = 0; + BATCH_LOCALS(&r200->radeon); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + dst_format = RADEON_COLOR_FORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + dst_format = RADEON_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + dst_format = RADEON_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + dst_format = RADEON_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_A8: + dst_format = RADEON_COLOR_FORMAT_RGB8; + break; + default: + break; + } + + BEGIN_BATCH_NO_AUTOSTATE(22); + OUT_BATCH_REGVAL(R200_RE_AUX_SCISSOR_CNTL, 0); + OUT_BATCH_REGVAL(R200_RE_CNTL, 0); + OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0); + OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) | + (height << RADEON_RE_HEIGHT_SHIFT))); + OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff); + OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); + OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format); + + OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1); + OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1); + OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); +} + +static GLboolean validate_buffers(struct r200_context *r200, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + radeon_cs_space_add_persistent_bo(r200->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM, 0); + + radeon_cs_space_add_persistent_bo(r200->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + + ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs, + first_elem(&r200->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static inline void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[1]; + buf[3] = 1.0 - buf[3]; + } +} + +static inline void emit_draw_packet(struct r200_context *r200, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + float verts[12]; + BATCH_LOCALS(&r200->radeon); + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + verts[0] = dst_x_offset; + verts[1] = dst_y_offset + reg_height; + verts[2] = texcoords[0]; + verts[3] = texcoords[3]; + + verts[4] = dst_x_offset + reg_width; + verts[5] = dst_y_offset + reg_height; + verts[6] = texcoords[1]; + verts[7] = texcoords[3]; + + verts[8] = dst_x_offset + reg_width; + verts[9] = dst_y_offset; + verts[10] = texcoords[1]; + verts[11] = texcoords[2]; + + BEGIN_BATCH(14); + OUT_BATCH(R200_CP_CMD_3D_DRAW_IMMD_2 | (12 << 16)); + OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | + (3 << 16)); + OUT_BATCH_TABLE(verts, 12); + END_BATCH(); +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r200 r200 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r200_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + struct r200_context *r200 = R200_CONTEXT(ctx); + + if (!is_blit_supported(dst_mesaformat)) + return GL_FALSE; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Rendering to small buffer doesn't work. + * Looks like a hw limitation. + */ + if (dst_pitch < 32) + return GL_FALSE; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(r200->radeon.glCtx); + + rcommonEnsureCmdBufSpace(&r200->radeon, 78, __FUNCTION__); + + if (!validate_buffers(r200, src_bo, dst_bo)) + return GL_FALSE; + + /* 14 */ + emit_vtx_state(r200); + /* 28 */ + emit_tx_setup(r200, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + /* 22 */ + emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + /* 14 */ + emit_draw_packet(r200, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r200/r200_blit.h b/src/mesa/drivers/dri/r200/r200_blit.h new file mode 100644 index 00000000000..38487266ae1 --- /dev/null +++ b/src/mesa/drivers/dri/r200/r200_blit.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef R200_BLIT_H +#define R200_BLIT_H + +void r200_blit_init(struct r200_context *r200); + +unsigned r200_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned width, + unsigned height, + unsigned flip_y); + +#endif // R200_BLIT_H diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index f34e319222d..3d6d0f5ec0d 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -61,6 +61,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_maos.h" #include "r200_vertprog.h" #include "radeon_queryobj.h" +#include "r200_blit.h" #include "radeon_span.h" @@ -268,6 +269,7 @@ static void r200_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = r200Fallback; radeon->vtbl.update_scissor = r200_vtbl_update_scissor; radeon->vtbl.emit_query_finish = r200_emit_query_finish; + radeon->vtbl.blit = r200_blit; } @@ -294,6 +296,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, if ( !rmesa ) return GL_FALSE; + rmesa->radeon.radeonScreen = screen; r200_init_vtbl(&rmesa->radeon); /* init exp fog table data */ r200InitStaticFogData(); @@ -326,10 +329,14 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, r200InitDriverFuncs(&functions); r200InitIoctlFuncs(&functions); r200InitStateFuncs(&functions); - r200InitTextureFuncs(&functions); + r200InitTextureFuncs(&rmesa->radeon, &functions); r200InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); + if (rmesa->radeon.radeonScreen->kernel_mm) { + r200_init_texcopy_functions(&functions); + } + if (!radeonInitContext(&rmesa->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index 17e4d8962ea..a9dce310ae1 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -645,6 +645,8 @@ extern GLboolean r200MakeCurrent( __DRIcontext *driContextPriv, __DRIdrawable *driReadPriv ); extern GLboolean r200UnbindContext( __DRIcontext *driContextPriv ); +extern void r200_init_texcopy_functions(struct dd_function_table *table); + /* ================================================================ * Debugging: */ diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 5b87ba6ccdf..0916df64765 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -477,7 +477,7 @@ static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, -void r200InitTextureFuncs( struct dd_function_table *functions ) +void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -511,6 +511,11 @@ void r200InitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; functions->NewTextureImage = radeonNewTextureImage; diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h index e122de6e5ed..1a1e7038df6 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.h +++ b/src/mesa/drivers/dri/r200/r200_tex.h @@ -48,7 +48,7 @@ extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t ); -extern void r200InitTextureFuncs( struct dd_function_table *functions ); +extern void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); extern void r200UpdateFragmentShader( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r200/radeon_tex_copy.c b/src/mesa/drivers/dri/r200/radeon_tex_copy.c new file mode 120000 index 00000000000..dfa5ba34e65 --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_tex_copy.c @@ -0,0 +1 @@ +../radeon/radeon_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index be005bd1641..0d0fbcc4081 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,7 +39,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_span.c \ radeon_queryobj.c \ - radeon_texture.c + radeon_texture.c \ + radeon_tex_copy.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -50,7 +51,6 @@ DRIVER_SOURCES = \ r300_state.c \ r300_render.c \ r300_tex.c \ - r300_texcopy.c \ r300_texstate.c \ r300_vertprog.c \ r300_fragprog_common.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 731adc1af2b..f27f858652e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -23,6 +23,8 @@ #ifndef RADEON_COMPILER_H #define RADEON_COMPILER_H +#include "../../../../main/compiler.h" + #include "memory_pool.h" #include "radeon_code.h" #include "radeon_program.h" diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index 2eec27e900f..e24c7955d4b 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -150,8 +150,8 @@ static void r300_emit_tx_setup(struct r300_context *r300, (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT) | (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT) | R300_TX_MIN_FILTER_MIP_NONE | - R300_TX_MIN_FILTER_LINEAR | - R300_TX_MAG_FILTER_LINEAR | + R300_TX_MIN_FILTER_NEAREST | + R300_TX_MAG_FILTER_NEAREST | (0 << 28)); OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0); OUT_BATCH_REGVAL(R300_TX_SIZE_0, @@ -403,9 +403,8 @@ static void calc_tex_coords(float img_width, float img_height, buf[3] = buf[2] + reg_height / img_height; if (flip_y) { - float tmp = buf[2]; - buf[2] = 1.0 - buf[3]; - buf[3] = 1.0 - tmp; + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; } } @@ -424,13 +423,13 @@ static void emit_draw_packet(struct r300_context *r300, flip_y, texcoords); float verts[] = { dst_x_offset, dst_y_offset, - texcoords[0], texcoords[3], - dst_x_offset, dst_y_offset + reg_height, texcoords[0], texcoords[2], + dst_x_offset, dst_y_offset + reg_height, + texcoords[0], texcoords[3], dst_x_offset + reg_width, dst_y_offset + reg_height, - texcoords[1], texcoords[2], + texcoords[1], texcoords[3], dst_x_offset + reg_width, dst_y_offset, - texcoords[1], texcoords[3] }; + texcoords[1], texcoords[2] }; BATCH_LOCALS(&r300->radeon); @@ -495,6 +494,27 @@ static void emit_cb_setup(struct r300_context *r300, END_BATCH(); } +static unsigned is_blit_supported(gl_format dst_format) +{ + switch (dst_format) { + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888: + break; + default: + return 0; + } + + if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + /** * Copy a region of [@a width x @a height] pixels from source buffer * to destination buffer. @@ -519,29 +539,31 @@ static void emit_cb_setup(struct r300_context *r300, * @param[in] height region height * @param[in] flip_y set if y coords of the source image need to be flipped */ -GLboolean r300_blit(struct r300_context *r300, - struct radeon_bo *src_bo, - intptr_t src_offset, - gl_format src_mesaformat, - unsigned src_pitch, - unsigned src_width, - unsigned src_height, - unsigned src_x_offset, - unsigned src_y_offset, - struct radeon_bo *dst_bo, - intptr_t dst_offset, - gl_format dst_mesaformat, - unsigned dst_pitch, - unsigned dst_width, - unsigned dst_height, - unsigned dst_x_offset, - unsigned dst_y_offset, - unsigned reg_width, - unsigned reg_height, - unsigned flip_y) +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) { - if (_mesa_get_format_bits(src_mesaformat, GL_DEPTH_BITS) > 0) - return GL_FALSE; + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!is_blit_supported(dst_mesaformat)) + return 0; /* Make sure that colorbuffer has even width - hw limitation */ if (dst_pitch % 2 > 0) @@ -551,7 +573,7 @@ GLboolean r300_blit(struct r300_context *r300, * Looks like a hw limitation. */ if (dst_pitch < 32) - return GL_FALSE; + return 0; /* Need to clamp the region size to make sure * we don't read outside of the source buffer @@ -567,6 +589,10 @@ GLboolean r300_blit(struct r300_context *r300, reg_height = dst_height - dst_y_offset; if (src_bo == dst_bo) { + return 0; + } + + if (src_offset % 32 || dst_offset % 32) { return GL_FALSE; } @@ -587,7 +613,7 @@ GLboolean r300_blit(struct r300_context *r300, radeonFlush(r300->radeon.glCtx); if (!validate_buffers(r300, src_bo, dst_bo)) - return GL_FALSE; + return 0; rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__); @@ -618,5 +644,5 @@ GLboolean r300_blit(struct r300_context *r300, radeonFlush(r300->radeon.glCtx); - return GL_TRUE; -}
\ No newline at end of file + return 1; +} diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h index dc21e880982..735acaddd70 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.h +++ b/src/mesa/drivers/dri/r300/r300_blit.h @@ -30,25 +30,25 @@ void r300_blit_init(struct r300_context *r300); -GLboolean r300_blit(struct r300_context *r300, - struct radeon_bo *src_bo, - intptr_t src_offset, - gl_format src_mesaformat, - unsigned src_pitch, - unsigned src_width, - unsigned src_height, - unsigned src_x_offset, - unsigned src_y_offset, - struct radeon_bo *dst_bo, - intptr_t dst_offset, - gl_format dst_mesaformat, - unsigned dst_pitch, - unsigned dst_width, - unsigned dst_height, - unsigned dst_x_offset, - unsigned dst_y_offset, - unsigned width, - unsigned height, - unsigned flip_y); +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y); #endif // R300_BLIT_H
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 1f6ccf6ddca..bb0e6db3139 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -93,8 +93,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/remap_helper.h" -void r300_init_texcopy_functions(struct dd_function_table *table); - static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, @@ -326,6 +324,8 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z; } else radeon->vtbl.emit_query_finish = r300_emit_query_finish; + + radeon->vtbl.blit = r300_blit; } static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -488,15 +488,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r300InitIoctlFuncs(&functions); r300InitStateFuncs(&functions); - r300InitTextureFuncs(&functions); + r300InitTextureFuncs(&r300->radeon, &functions); r300InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); radeonInitBufferObjectFuncs(&functions); - if (r300->radeon.radeonScreen->kernel_mm) { - r300_init_texcopy_functions(&functions); - } - if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 546cd8ddde3..78ab43a99f9 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -554,8 +554,6 @@ extern void r300InitShaderFunctions(r300ContextPtr r300); extern void r300InitDraw(GLcontext *ctx); -extern void r300_init_texcopy_functions(struct dd_function_table *table); - #define r300PackFloat32 radeonPackFloat32 #define r300PackFloat24 radeonPackFloat24 diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 963f648cb14..eb5d2d50041 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -312,7 +312,7 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, return &t->base; } -void r300InitTextureFuncs(struct dd_function_table *functions) +void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -340,6 +340,11 @@ void r300InitTextureFuncs(struct dd_function_table *functions) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; driInitTextureFormats(); diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index 6ede0fe25c9..9694e703b83 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -49,7 +49,7 @@ extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, extern GLboolean r300ValidateBuffers(GLcontext * ctx); -extern void r300InitTextureFuncs(struct dd_function_table *functions); +extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); int32_t r300TranslateTexFormat(gl_format mesaFormat); diff --git a/src/mesa/drivers/dri/r300/radeon_tex_copy.c b/src/mesa/drivers/dri/r300/radeon_tex_copy.c new file mode 120000 index 00000000000..dfa5ba34e65 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tex_copy.c @@ -0,0 +1 @@ +../radeon/radeon_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 26f47b72687..e55d0babd85 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -39,7 +39,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_span.c \ radeon_texture.c \ - radeon_queryobj.c + radeon_queryobj.c \ + radeon_tex_copy.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -59,6 +60,7 @@ DRIVER_SOURCES = \ r700_render.c \ r600_tex.c \ r600_texstate.c \ + r600_blit.c \ r700_debug.c \ $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) \ diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c new file mode 100644 index 00000000000..d7cd59ade62 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -0,0 +1,1660 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r600_context.h" + +#include "r600_blit.h" +#include "r600_blit_shaders.h" +#include "r600_cmdbuf.h" + +/* common formats supported as both textures and render targets */ +static unsigned is_blit_supported(gl_format mesa_format) +{ + switch (mesa_format) { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_RGB565_REV: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB4444_REV: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB1555_REV: + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: + case MESA_FORMAT_RGB332: + case MESA_FORMAT_A8: + case MESA_FORMAT_I8: + case MESA_FORMAT_CI8: + case MESA_FORMAT_L8: + case MESA_FORMAT_RGBA_FLOAT32: + case MESA_FORMAT_RGBA_FLOAT16: + case MESA_FORMAT_ALPHA_FLOAT32: + case MESA_FORMAT_ALPHA_FLOAT16: + case MESA_FORMAT_LUMINANCE_FLOAT32: + case MESA_FORMAT_LUMINANCE_FLOAT16: + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_Z24_S8: + case MESA_FORMAT_Z16: + case MESA_FORMAT_Z32: + case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SLA8: + case MESA_FORMAT_SL8: + break; + default: + return 0; + } + + /* ??? */ + /* not sure blit to depth works or not yet */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void +set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_format, + int nPitchInPixel, int w, int h, intptr_t dst_offset) +{ + uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view = 0; + int id = 0; + uint32_t comp_swap, format; + BATCH_LOCALS(&context->radeon); + + cb_color0_base = dst_offset / 256; + + SETfield(cb_color0_size, (nPitchInPixel / 8) - 1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); + + SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); + SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + + SETbit(cb_color0_info, BLEND_BYPASS_bit); + + switch(mesa_format) { + case MESA_FORMAT_RGBA8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SIGNED_RGBA8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SIGNED_RGBA8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB565: + format = COLOR_5_6_5; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB565_REV: + format = COLOR_5_6_5; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB4444: + format = COLOR_4_4_4_4; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB4444_REV: + format = COLOR_4_4_4_4; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB1555: + format = COLOR_1_5_5_5; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB1555_REV: + format = COLOR_1_5_5_5; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_AL88: + format = COLOR_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_AL88_REV: + format = COLOR_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB332: + format = COLOR_3_3_2; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_A8: + format = COLOR_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_I8: + case MESA_FORMAT_CI8: + format = COLOR_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_L8: + format = COLOR_8; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA_FLOAT32: + format = COLOR_32_32_32_32_FLOAT; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA_FLOAT16: + format = COLOR_16_16_16_16_FLOAT; + comp_swap = SWAP_STD_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT32: + format = COLOR_32_FLOAT; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT16: + format = COLOR_16_FLOAT; + comp_swap = SWAP_ALT_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: + format = COLOR_32_FLOAT; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: + format = COLOR_16_FLOAT; + comp_swap = SWAP_ALT; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + format = COLOR_32_32_FLOAT; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + format = COLOR_16_16_FLOAT; + comp_swap = SWAP_ALT_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + format = COLOR_32_FLOAT; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + format = COLOR_16_FLOAT; + comp_swap = SWAP_STD; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + format = COLOR_8_24; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z24_S8: + format = COLOR_24_8; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z16: + format = COLOR_16; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z32: + format = COLOR_32; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SRGBA8: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SLA8: + format = COLOR_8_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SL8: + format = COLOR_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + default: + fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format)); + assert("Invalid format for US output\n"); + return; + } + + SETfield(cb_color0_info, format, CB_COLOR0_INFO__FORMAT_shift, + CB_COLOR0_INFO__FORMAT_mask); + SETfield(cb_color0_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + + if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && + (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { + BEGIN_BATCH_NO_AUTOSTATE(2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + R600_OUT_BATCH((2 << id)); + END_BATCH(); + } + + /* Set CMASK & TILE buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(12); + R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size); + R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); + R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void load_shaders(GLcontext * ctx) +{ + + radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + context_t *context = R700_CONTEXT(ctx); + int i, size; + uint32_t *shader; + + if (context->blit_bo_loaded == 1) + return; + + size = 4096; + context->blit_bo = radeon_bo_open(radeonctx->radeonScreen->bom, 0, + size, 256, RADEON_GEM_DOMAIN_GTT, 0); + radeon_bo_map(context->blit_bo, 1); + shader = context->blit_bo->ptr; + + for(i=0; i<sizeof(r6xx_vs)/4; i++) { + shader[128+i] = r6xx_vs[i]; + } + for(i=0; i<sizeof(r6xx_ps)/4; i++) { + shader[256+i] = r6xx_ps[i]; + } + + radeon_bo_unmap(context->blit_bo); + context->blit_bo_loaded = 1; + +} + +static inline void +set_shaders(context_t *context) +{ + struct radeon_bo * pbo = context->blit_bo; + BATCH_LOCALS(&context->radeon); + + uint32_t sq_pgm_start_fs = (512 >> 8); + uint32_t sq_pgm_resources_fs = 0; + uint32_t sq_pgm_cf_offset_fs = 0; + + uint32_t sq_pgm_start_vs = (512 >> 8); + uint32_t sq_pgm_resources_vs = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_vs = 0; + + uint32_t sq_pgm_start_ps = (1024 >> 8); + uint32_t sq_pgm_resources_ps = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_ps = 0; + uint32_t sq_pgm_exports_ps = (1 << 1); + + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + + /* FS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1); + R600_OUT_BATCH(sq_pgm_start_fs); + R600_OUT_BATCH_RELOC(sq_pgm_start_fs, + pbo, + sq_pgm_start_fs, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, sq_pgm_resources_fs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, sq_pgm_cf_offset_fs); + END_BATCH(); + + /* VS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(sq_pgm_start_vs); + R600_OUT_BATCH_RELOC(sq_pgm_start_vs, + pbo, + sq_pgm_start_vs, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, sq_pgm_resources_vs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, sq_pgm_cf_offset_vs); + END_BATCH(); + + /* PS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(sq_pgm_start_ps); + R600_OUT_BATCH_RELOC(sq_pgm_start_ps, + pbo, + sq_pgm_start_ps, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, sq_pgm_resources_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, sq_pgm_exports_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, sq_pgm_cf_offset_ps); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(18); + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_CONFIG, 0); //EXPORT_COUNT is - 1 + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_ID_0, 0); + R600_OUT_BATCH_REGVAL(SPI_PS_INPUT_CNTL_0, SEL_CENTROID_bit); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_1, 0); + R600_OUT_BATCH_REGVAL(SPI_INTERP_CONTROL_0, 0); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void +set_vtx_resource(context_t *context) +{ + struct radeon_bo *bo = context->blit_bo; + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + END_BATCH(); + COMMIT_BATCH(); + + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(SQ_FETCH_RESOURCE_VS_OFFSET * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(0); + R600_OUT_BATCH(48 - 1); + R600_OUT_BATCH(16 << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift); + R600_OUT_BATCH(1 << MEM_REQUEST_SIZE_shift); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift); + R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0, + bo, + SQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + +} + +static inline void +set_tex_resource(context_t * context, + gl_format mesa_format, struct radeon_bo *bo, int w, int h, + int TexelPitch, intptr_t src_offset) +{ + uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6; + + sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0; + BATCH_LOCALS(&context->radeon); + + SETfield(sq_tex_resource0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask); + SETfield(sq_tex_resource0, ARRAY_LINEAR_GENERAL, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + + switch (mesa_format) { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) { + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) { + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_ARGB8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565: + SETfield(sq_tex_resource1, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565_REV: + SETfield(sq_tex_resource1, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444: + SETfield(sq_tex_resource1, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444_REV: + SETfield(sq_tex_resource1, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555: + SETfield(sq_tex_resource1, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555_REV: + SETfield(sq_tex_resource1, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: /* TODO : Check this. */ + SETfield(sq_tex_resource1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB332: + SETfield(sq_tex_resource1, FMT_3_3_2, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_L8: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_I8: /* X, X, X, X */ + case MESA_FORMAT_CI8: + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGBA_FLOAT32: + SETfield(sq_tex_resource1, FMT_32_32_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGBA_FLOAT16: + SETfield(sq_tex_resource1, FMT_16_16_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + SETfield(sq_tex_resource1, FMT_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + SETfield(sq_tex_resource1, FMT_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z16: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_16, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_X8_Z24: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_S8_Z24: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z24_S8: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_24_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z32: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_32, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_S8: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_SRGBA8: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SLA8: + SETfield(sq_tex_resource1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SL8: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + default: + fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format)); + assert("Invalid format for US output\n"); + return; + }; + + SETfield(sq_tex_resource0, (TexelPitch/8)-1, PITCH_shift, PITCH_mask); + SETfield(sq_tex_resource0, w - 1, TEX_WIDTH_shift, TEX_WIDTH_mask); + SETfield(sq_tex_resource1, h - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); + + sq_tex_resource2 = src_offset / 256; + + SETfield(sq_tex_resource6, SQ_TEX_VTX_VALID_TEXTURE, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, + SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(0 * 7); + + R600_OUT_BATCH(sq_tex_resource0); + R600_OUT_BATCH(sq_tex_resource1); + R600_OUT_BATCH(sq_tex_resource2); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE3 + R600_OUT_BATCH(sq_tex_resource4); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE5 + R600_OUT_BATCH(sq_tex_resource6); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static inline void +set_tex_sampler(context_t * context) +{ + uint32_t sq_tex_sampler_word0 = 0, sq_tex_sampler_word1 = 0, sq_tex_sampler_word2 = 0; + int i = 0; + + SETbit(sq_tex_sampler_word2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit); + + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(sq_tex_sampler_word0); + R600_OUT_BATCH(sq_tex_sampler_word1); + R600_OUT_BATCH(sq_tex_sampler_word2); + END_BATCH(); + +} + +static inline void +set_scissors(context_t *context, int x1, int y1, int x2, int y2) +{ + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(17); + R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 3); + R600_OUT_BATCH(0); //PA_SC_WINDOW_OFFSET + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); //PA_SC_WINDOW_SCISSOR_TL + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + /* XXX 16 of these PA_SC_VPORT_SCISSOR_0_TL_num ... */ + R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL, 2 ); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void +set_vb_data(context_t * context, int src_x, int src_y, int dst_x, int dst_y, + int w, int h, int src_h, unsigned flip_y) +{ + float *vb; + radeon_bo_map(context->blit_bo, 1); + vb = context->blit_bo->ptr; + + vb[0] = (float)(dst_x); + vb[1] = (float)(dst_y); + vb[2] = (float)(src_x); + vb[3] = (flip_y) ? (float)(src_h - src_y) : (float)src_y; + + vb[4] = (float)(dst_x); + vb[5] = (float)(dst_y + h); + vb[6] = (float)(src_x); + vb[7] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); + + vb[8] = (float)(dst_x + w); + vb[9] = (float)(dst_y + h); + vb[10] = (float)(src_x + w); + vb[11] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); + + radeon_bo_unmap(context->blit_bo); + +} + +static inline void +draw_auto(context_t *context) +{ + BATCH_LOCALS(&context->radeon); + uint32_t vgt_primitive_type = 0, vgt_index_type = 0, vgt_draw_initiator = 0, vgt_num_indices; + + SETfield(vgt_primitive_type, DI_PT_RECTLIST, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, + INDEX_TYPE_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, + MAJOR_MODE_mask); + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, + SOURCE_SELECT_mask); + + vgt_num_indices = 3; + + BEGIN_BATCH_NO_AUTOSTATE(10); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + + END_BATCH(); + COMMIT_BATCH(); +} + +static inline void +set_default_state(context_t *context) +{ + int ps_prio = 0; + int vs_prio = 1; + int gs_prio = 2; + int es_prio = 3; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + uint32_t ta_cntl_aux, db_watermarks, sq_dyn_gpr_cntl_ps_flush_req, db_debug; + BATCH_LOCALS(&context->radeon); + + switch (context->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV630: + case CHIP_FAMILY_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV610: + case CHIP_FAMILY_RV620: + case CHIP_FAMILY_RS780: + case CHIP_FAMILY_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV730: + case CHIP_FAMILY_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + sq_config = 0; + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + CLEARbit(sq_config, VC_ENABLE_bit); + else + SETbit(sq_config, VC_ENABLE_bit); + SETbit(sq_config, DX9_CONSTS_bit); + SETbit(sq_config, ALU_INST_PREFER_VECTOR_bit); + SETfield(sq_config, ps_prio, PS_PRIO_shift, PS_PRIO_mask); + SETfield(sq_config, vs_prio, VS_PRIO_shift, VS_PRIO_mask); + SETfield(sq_config, gs_prio, GS_PRIO_shift, GS_PRIO_mask); + SETfield(sq_config, es_prio, ES_PRIO_shift, ES_PRIO_mask); + + sq_gpr_resource_mgmt_1 = 0; + SETfield(sq_gpr_resource_mgmt_1, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_1, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_1, num_temp_gprs, + NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask); + + sq_gpr_resource_mgmt_2 = 0; + SETfield(sq_gpr_resource_mgmt_2, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_2, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask); + + sq_thread_resource_mgmt = 0; + SETfield(sq_thread_resource_mgmt, num_ps_threads, + NUM_PS_THREADS_shift, NUM_PS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_vs_threads, + NUM_VS_THREADS_shift, NUM_VS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_gs_threads, + NUM_GS_THREADS_shift, NUM_GS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_es_threads, + NUM_ES_THREADS_shift, NUM_ES_THREADS_mask); + + sq_stack_resource_mgmt_1 = 0; + SETfield(sq_stack_resource_mgmt_1, num_ps_stack_entries, + NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask); + SETfield(sq_stack_resource_mgmt_1, num_vs_stack_entries, + NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask); + + sq_stack_resource_mgmt_2 = 0; + SETfield(sq_stack_resource_mgmt_2, num_gs_stack_entries, + NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask); + SETfield(sq_stack_resource_mgmt_2, num_es_stack_entries, + NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask); + + ta_cntl_aux = 0; + SETfield(ta_cntl_aux, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask); + db_watermarks = 0; + SETfield(db_watermarks, 4, DEPTH_FREE_shift, DEPTH_FREE_mask); + SETfield(db_watermarks, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask); + SETfield(db_watermarks, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask); + SETfield(db_watermarks, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask); + sq_dyn_gpr_cntl_ps_flush_req = 0; + db_debug = 0; + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { + SETfield(ta_cntl_aux, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask); + db_debug = 0x82000000; + SETfield(db_watermarks, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask); + } else { + SETfield(ta_cntl_aux, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask); + SETfield(db_watermarks, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask); + SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit); + } + + BEGIN_BATCH_NO_AUTOSTATE(117); + R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); + R600_OUT_BATCH(sq_config); + R600_OUT_BATCH(sq_gpr_resource_mgmt_1); + R600_OUT_BATCH(sq_gpr_resource_mgmt_2); + R600_OUT_BATCH(sq_thread_resource_mgmt); + R600_OUT_BATCH(sq_stack_resource_mgmt_1); + R600_OUT_BATCH(sq_stack_resource_mgmt_2); + + R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, ta_cntl_aux); + R600_OUT_BATCH_REGVAL(VC_ENHANCE, 0); + R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, sq_dyn_gpr_cntl_ps_flush_req); + R600_OUT_BATCH_REGVAL(DB_DEBUG, db_debug); + R600_OUT_BATCH_REGVAL(DB_WATERMARKS, db_watermarks); + + R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(CB_CLRCMP_CONTROL, + (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift)); + R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0); + R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0); + R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, (0xcc << ROP3_shift)); + + R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, 0); + R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, (FACE_bit) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)); + R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) | + (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | + (X_1_256TH << QUANT_MODE_shift)); + + R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4); + R600_OUT_BATCH(2048); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, 0); + R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, 0); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, 0); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, 0); + + R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0); + + END_BATCH(); + COMMIT_BATCH(); +} + +static GLboolean validate_buffers(context_t *rmesa, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM, 0); + + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + rmesa->blit_bo, RADEON_GEM_DOMAIN_GTT, 0); + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + rmesa->blit_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + first_elem(&rmesa->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +unsigned r600_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x, + unsigned src_y, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x, + unsigned dst_y, + unsigned w, + unsigned h, + unsigned flip_y) +{ + context_t *context = R700_CONTEXT(ctx); + int id = 0; + + if (!is_blit_supported(dst_mesaformat)) + return GL_FALSE; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (src_offset % 256 || dst_offset % 256) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n", + src_width, src_height, src_pitch, + _mesa_format_row_stride(src_mesaformat, src_width), + _mesa_get_format_name(src_mesaformat)); + fprintf(stderr, "dst: width %d, height %d, pitch %d, format %s\n", + dst_width, dst_height, + _mesa_format_row_stride(dst_mesaformat, dst_width), + _mesa_get_format_name(dst_mesaformat)); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(ctx); + + rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__); + + /* load shaders */ + load_shaders(context->radeon.glCtx); + + if (!validate_buffers(context, src_bo, dst_bo)) + return GL_FALSE; + + /* set clear state */ + /* 117 */ + set_default_state(context); + + /* shaders */ + /* 72 */ + set_shaders(context); + + /* src */ + /* 20 */ + set_tex_resource(context, src_mesaformat, src_bo, + src_width, src_height, src_pitch, src_offset); + + /* 5 */ + set_tex_sampler(context); + + /* dst */ + /* 27 */ + set_render_target(context, dst_bo, dst_mesaformat, + dst_pitch, dst_width, dst_height, dst_offset); + /* scissors */ + /* 17 */ + set_scissors(context, dst_x, dst_y, dst_x + dst_width, dst_y + dst_height); + + set_vb_data(context, src_x, src_y, dst_x, dst_y, w, h, src_height, flip_y); + /* Vertex buffer setup */ + /* 24 */ + set_vtx_resource(context); + + /* draw */ + /* 10 */ + draw_auto(context); + + /* 7 */ + r700SyncSurf(context, dst_bo, 0, + RADEON_GEM_DOMAIN_VRAM|RADEON_GEM_DOMAIN_GTT, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + /* 5 */ + r700WaitForIdleClean(context); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/r600_blit.h b/src/mesa/drivers/dri/r600/r600_blit.h new file mode 100644 index 00000000000..f280e23489e --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit.h @@ -0,0 +1,21 @@ +unsigned r600_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned w, + unsigned h, + unsigned flip_y); + diff --git a/src/mesa/drivers/dri/r600/r600_blit_shaders.h b/src/mesa/drivers/dri/r600/r600_blit_shaders.h new file mode 100644 index 00000000000..492dde96368 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit_shaders.h @@ -0,0 +1,28 @@ +const uint32_t r6xx_vs[] = +{ + 0x00000004, // CF_DWORD0(ADDR(4)) + 0x81000000, // SQ_CF_INST_VTX COUNT(1) + 0x0000203c, // CF_EXP_IMP CF_POS0 SQ_EXPORT_POS RW_GPR(0) ELEM_SIZE(0) + 0x94000b08, // SQ_CF_INST_EXPORT_DONE SWZ XY01 BARRIER(1) + 0x00004000, // CF_EXP_IMP 0 SQ_EXPORT_PARAM RW_GPR(0) ELEM_SIZE(0) + 0x14200b1a, // SQ_CF_INST_EXPORT_DONE SWZ ZW01 EOP(1) BARRIER(0) + 0x00000000, + 0x00000000, + 0x3c000000, // SQ_VTX_INST_FETCH BUFFER_ID(0) MEGA_FETCH_COUNT(16) + 0x68cd1000, // DST_GPR(0) DST_SWZ: XYZW DATA_FORMAT(35) SQ_NUM_FORMAT_SCALED SQ_FORMAT_COMP_SIGNED + 0x00080000, // ENDIAN_SWAP(SQ_ENDIAN_NONE) MEGA_FETCH(1) + 0x00000000, // VTX_DWORD_PAD +}; + +const uint32_t r6xx_ps[] = +{ + 0x00000002, // CF_DWORD0 AADR(2) + 0x80800000, // SQ_CF_INST_TEX COUNT(1) + 0x00000000, // CF_ALLOC_IMP_EXP0 SQ_EXPORT_PIXEL RW_GPR(0) ELEM_SIZE(0) + 0x94200688, // SQ_CF_INST_EXPORT_DONE EOP(1) BARRIER(1) SWZ: XYZW + 0x00000010, // SQ_TEX_INST_SAMPLE SRC_GPR(0) RESOURCE_ID(0) + 0x000d1000, // DST_GPR(0) SWZ: XYZW TEX_UNNORMALIZED + 0xb0800000, // SAMPLER_ID(0) SRC_SWZ XYZW + 0x00000000, // TEX_DWORD_PAD +}; + diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index cb549497f54..68112c49dc3 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -65,6 +65,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_emit.h" #include "radeon_bocs_wrapper.h" #include "radeon_queryobj.h" +#include "r600_blit.h" #include "r700_state.h" #include "r700_ioctl.h" @@ -240,6 +241,7 @@ static void r600_init_vtbl(radeonContextPtr radeon) radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r600_fallback; radeon->vtbl.emit_query_finish = r600_emit_query_finish; + radeon->vtbl.blit = r600_blit; } static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -378,7 +380,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r700InitStateFuncs(&functions); - r600InitTextureFuncs(&functions); + r600InitTextureFuncs(&r600->radeon, &functions); r700InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); r700InitIoctlFuncs(&functions); diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index a1b4af715e2..72c8c869b70 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -148,6 +148,8 @@ struct r600_context { GLint nNumActiveAos; StreamDesc stream_desc[VERT_ATTRIB_MAX]; struct r700_index_buffer ind_buf; + struct radeon_bo *blit_bo; + GLboolean blit_bo_loaded; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -178,6 +180,8 @@ extern GLboolean r700SyncSurf(context_t *context, uint32_t write_domain, uint32_t sync_type); +extern void r700WaitForIdleClean(context_t *context); + extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); extern void r700InitDraw(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index f745fe3e8a6..71dfd7e0591 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -396,7 +396,7 @@ static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx, return &t->base; } -void r600InitTextureFuncs(struct dd_function_table *functions) +void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -424,6 +424,11 @@ void r600InitTextureFuncs(struct dd_function_table *functions) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; driInitTextureFormats(); diff --git a/src/mesa/drivers/dri/r600/r600_tex.h b/src/mesa/drivers/dri/r600/r600_tex.h index fb0e1a023e1..c2141ef5e5c 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.h +++ b/src/mesa/drivers/dri/r600/r600_tex.h @@ -58,6 +58,6 @@ extern void r600SetTexOffset(__DRIcontext *pDRICtx, GLint texname, extern GLboolean r600ValidateBuffers(GLcontext * ctx); -extern void r600InitTextureFuncs(struct dd_function_table *functions); +extern void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); #endif /* __r600_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 0ff16b4dddc..c01b2fbb146 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4469,7 +4469,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } pAsm->D2.dst2.SaturateMode = 1; - pAsm->S[0].src.rtype = pAsm->D.dst.rtype; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = pAsm->D.dst.reg; noswizzle_PVSSRC(&(pAsm->S[0].src)); noneg_PVSSRC(&(pAsm->S[0].src)); @@ -5090,15 +5090,15 @@ void add_return_inst(r700_AssemblerBase *pAsm) { if(GL_FALSE == add_cf_instruction(pAsm) ) { - return GL_FALSE; + return; } //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; @@ -5302,7 +5302,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) { - GLfloat fLiteral[2] = {0.1, 0.0}; + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ pAsm->D.dst.opcode = SQ_OP2_INST_MOV; pAsm->D.dst.op3 = 0; @@ -5353,7 +5353,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) GLboolean testFlag(r700_AssemblerBase *pAsm) { - GLfloat fLiteral[2] = {0.1, 0.0}; + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ //Test flag GLuint tmp = gethelpr(pAsm); @@ -6123,7 +6123,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm, R700ControlFlowGenericClause* prelude_cf_ptr = NULL; - /* copy srcs to presub inputs */ + /* copy srcs to presub inputs */ pAsm->alu_x_opcode = SQ_CF_INST_ALU; for(i=0; i<uNumValidSrc; i++) { diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 56baf5b0d91..0064d0814f3 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -619,6 +619,7 @@ GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); +GLboolean assemble_CONT(r700_AssemblerBase *pAsm); GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 3bc2d2ba02b..1a1a87c3cf9 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -453,13 +453,31 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * R600_OUT_BATCH((2 << id)); END_BATCH(); } + /* Set CMASK & TILE buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(18); + BEGIN_BATCH_NO_AUTOSTATE(12); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_TILE + (4 * id), r700->render_target[id].CB_COLOR0_TILE.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_FRAG + (4 * id), r700->render_target[id].CB_COLOR0_FRAG.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index eab27cbd842..3a6210c53ac 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -422,7 +422,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, } /* start 3d, idle, cb/db flush */ -#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 +#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 18 static GLuint r700PredictRenderSize(GLcontext* ctx, const struct _mesa_prim *prim, diff --git a/src/mesa/drivers/dri/r600/radeon_tex_copy.c b/src/mesa/drivers/dri/r600/radeon_tex_copy.c new file mode 120000 index 00000000000..dfa5ba34e65 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_tex_copy.c @@ -0,0 +1 @@ +../radeon/radeon_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile index 2b2f2c4aa7a..c776be0e605 100644 --- a/src/mesa/drivers/dri/radeon/Makefile +++ b/src/mesa/drivers/dri/radeon/Makefile @@ -26,7 +26,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_queryobj.c \ radeon_span.c \ - radeon_texture.c + radeon_texture.c \ + radeon_tex_copy.c DRIVER_SOURCES = \ radeon_context.c \ @@ -40,6 +41,7 @@ DRIVER_SOURCES = \ radeon_swtcl.c \ radeon_maos.c \ radeon_sanity.c \ + radeon_blit.c \ $(RADEON_COMMON_SOURCES) C_SOURCES = \ diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c new file mode 100644 index 00000000000..0df4fbb33c5 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_blit.c @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "radeon_context.h" +#include "radeon_blit.h" + +static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (count) + return CP_PACKET0(reg, count - 1); + return CP_PACKET2; +} + +/* common formats supported as both textures and render targets */ +static unsigned is_blit_supported(gl_format mesa_format) +{ + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_A8: + break; + default: + return 0; + } + + /* ??? */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void emit_vtx_state(struct r100_context *r100) +{ + BATCH_LOCALS(&r100->radeon); + + BEGIN_BATCH(8); + if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0); + } else { + OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS); + + } + OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_TEX1_W_ROUTING_USE_W0)); + OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0); + OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_ROUND | + RADEON_ROUND_PREC_4TH_PIX)); + END_BATCH(); +} + +static void inline emit_tx_setup(struct r100_context *r100, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + uint32_t txformat = RADEON_TXFORMAT_NON_POWER2; + BATCH_LOCALS(&r100->radeon); + + assert(width <= 2047); + assert(height <= 2047); + assert(offset % 32 == 0); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_XRGB8888: + txformat |= RADEON_TXFORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + txformat |= RADEON_TXFORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_ARGB1555: + txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_A8: + txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + default: + break; + } + + BEGIN_BATCH(18); + OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); + OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO | + RADEON_COLOR_ARG_B_ZERO | + RADEON_COLOR_ARG_C_T0_COLOR | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX)); + OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO | + RADEON_ALPHA_ARG_B_ZERO | + RADEON_ALPHA_ARG_C_T0_ALPHA | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX)); + OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST | + RADEON_CLAMP_T_CLAMP_LAST | + RADEON_MAG_FILTER_NEAREST | + RADEON_MIN_FILTER_NEAREST)); + OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat); + OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) | + ((height - 1) << RADEON_TEX_VSIZE_SHIFT))); + OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32); + + OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + END_BATCH(); +} + +static inline void emit_cb_setup(struct r100_context *r100, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + uint32_t dst_pitch = pitch; + uint32_t dst_format = 0; + BATCH_LOCALS(&r100->radeon); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + dst_format = RADEON_COLOR_FORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + dst_format = RADEON_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + dst_format = RADEON_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + dst_format = RADEON_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_A8: + dst_format = RADEON_COLOR_FORMAT_RGB8; + break; + default: + break; + } + + BEGIN_BATCH_NO_AUTOSTATE(18); + OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0); + OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) | + (height << RADEON_RE_HEIGHT_SHIFT))); + OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff); + OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); + OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format); + + OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1); + OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1); + OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); +} + +static GLboolean validate_buffers(struct r100_context *r100, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM, 0); + + radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + + ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs, + first_elem(&r100->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static inline void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; + } +} + +static inline void emit_draw_packet(struct r100_context *r100, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + float verts[12]; + BATCH_LOCALS(&r100->radeon); + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + verts[0] = dst_x_offset; + verts[1] = dst_y_offset + reg_height; + verts[2] = texcoords[0]; + verts[3] = texcoords[3]; + + verts[4] = dst_x_offset + reg_width; + verts[5] = dst_y_offset + reg_height; + verts[6] = texcoords[1]; + verts[7] = texcoords[3]; + + verts[8] = dst_x_offset + reg_width; + verts[9] = dst_y_offset; + verts[10] = texcoords[1]; + verts[11] = texcoords[2]; + + BEGIN_BATCH(15); + OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16)); + OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0); + OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (3 << 16)); + OUT_BATCH_TABLE(verts, 12); + END_BATCH(); +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r100 r100 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r100_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + struct r100_context *r100 = R100_CONTEXT(ctx); + + if (!is_blit_supported(dst_mesaformat)) + return GL_FALSE; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Rendering to small buffer doesn't work. + * Looks like a hw limitation. + */ + if (dst_pitch < 32) + return GL_FALSE; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (src_offset % 32 || dst_offset % 32) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(ctx); + + rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__); + + if (!validate_buffers(r100, src_bo, dst_bo)) + return GL_FALSE; + + /* 8 */ + emit_vtx_state(r100); + /* 18 */ + emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + /* 18 */ + emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + /* 15 */ + emit_draw_packet(r100, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.h b/src/mesa/drivers/dri/radeon/radeon_blit.h new file mode 100644 index 00000000000..d36366ff791 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_blit.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_BLIT_H +#define RADEON_BLIT_H + +void r100_blit_init(struct r100_context *r100); + +unsigned r100_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned width, + unsigned height, + unsigned flip_y); + +#endif // RADEON_BLIT_H diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index ab79d2dc0f2..e397ee8c226 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -518,6 +518,26 @@ struct radeon_context { void (*free_context)(GLcontext *ctx); void (*emit_query_finish)(radeonContextPtr radeon); void (*update_scissor)(GLcontext *ctx); + unsigned (*blit)(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y); } vtbl; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 3cd305b0a25..6c08a90bbd6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -63,6 +63,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" #include "radeon_maos.h" #include "radeon_queryobj.h" +#include "radeon_blit.h" #define need_GL_ARB_occlusion_query #define need_GL_EXT_blend_minmax @@ -202,6 +203,7 @@ static void r100_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = radeonFallback; radeon->vtbl.free_context = r100_vtbl_free_context; radeon->vtbl.emit_query_finish = r100_emit_query_finish; + radeon->vtbl.blit = r100_blit; } /* Create the device specific context. @@ -228,6 +230,7 @@ r100CreateContext( const __GLcontextModes *glVisual, if ( !rmesa ) return GL_FALSE; + rmesa->radeon.radeonScreen = screen; r100_init_vtbl(&rmesa->radeon); /* init exp fog table data */ @@ -257,7 +260,7 @@ r100CreateContext( const __GLcontextModes *glVisual, * (the texture functions are especially important) */ _mesa_init_driver_functions( &functions ); - radeonInitTextureFuncs( &functions ); + radeonInitTextureFuncs( &rmesa->radeon, &functions ); radeonInitQueryObjFunctions(&functions); if (!radeonInitContext(&rmesa->radeon, &functions, diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index dfedc38bfd1..d84760bf74f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -453,7 +453,6 @@ struct r100_context { extern GLboolean r100CreateContext( const __GLcontextModes *glVisual, __DRIcontext *driContextPriv, void *sharedContextPrivate); - #endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 14163f13af4..882ee5c194b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -434,7 +434,7 @@ radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) -void radeonInitTextureFuncs( struct dd_function_table *functions ) +void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ) { functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; functions->TexImage1D = radeonTexImage1D; @@ -455,6 +455,11 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; functions->NewTextureImage = radeonNewTextureImage; diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h index a4aaddc74fa..0113ffd3dac 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.h +++ b/src/mesa/drivers/dri/radeon/radeon_tex.h @@ -52,6 +52,6 @@ extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t, extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t ); -extern void radeonInitTextureFuncs( struct dd_function_table *functions ); +extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); #endif /* __RADEON_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_texcopy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index ebc9c05b8a8..44e144c80f5 100644 --- a/src/mesa/drivers/dri/r300/r300_texcopy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -26,7 +26,7 @@ */ #include "radeon_common.h" -#include "r300_context.h" +#include "radeon_texture.h" #include "main/image.h" #include "main/teximage.h" @@ -34,11 +34,8 @@ #include "drivers/common/meta.h" #include "radeon_mipmap_tree.h" -#include "r300_blit.h" #include <main/debug.h> -// TODO: -// need to pass correct pitch for small dst textures! static GLboolean do_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level, @@ -48,13 +45,13 @@ do_copy_texsubimage(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height) { - struct r300_context *r300 = R300_CONTEXT(ctx); + radeonContextPtr radeon = RADEON_CONTEXT(ctx); struct radeon_renderbuffer *rrb; if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) { - rrb = radeon_get_depthbuffer(&r300->radeon); + rrb = radeon_get_depthbuffer(radeon); } else { - rrb = radeon_get_colorbuffer(&r300->radeon); + rrb = radeon_get_colorbuffer(radeon); } if (!timg->mt) { @@ -69,10 +66,6 @@ do_copy_texsubimage(GLcontext *ctx, intptr_t src_offset = rrb->draw_offset; intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level); - if (src_offset % 32 || dst_offset % 32) { - return GL_FALSE; - } - if (0) { fprintf(stderr, "%s: copying to face %d, level %d\n", __FUNCTION__, _mesa_tex_target_to_face(target), level); @@ -84,18 +77,19 @@ do_copy_texsubimage(GLcontext *ctx, } /* blit from src buffer to texture */ - return r300_blit(r300, rrb->bo, src_offset, rrb->base.Format, rrb->pitch/rrb->cpp, - rrb->base.Width, rrb->base.Height, x, y, - timg->mt->bo, dst_offset, timg->base.TexFormat, - timg->base.Width, timg->base.Width, timg->base.Height, - dstx, dsty, width, height, 1); + return radeon->vtbl.blit(ctx, rrb->bo, src_offset, rrb->base.Format, rrb->pitch/rrb->cpp, + rrb->base.Width, rrb->base.Height, x, y, + timg->mt->bo, dst_offset, timg->base.TexFormat, + timg->mt->levels[level].rowstride / _mesa_get_format_bytes(timg->base.TexFormat), + timg->base.Width, timg->base.Height, + dstx, dsty, width, height, 1); } -static void -r300CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) +void +radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border) { struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = @@ -139,11 +133,11 @@ fail: width, height, border); } -static void -r300CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLint x, GLint y, - GLsizei width, GLsizei height) +void +radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) { struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); @@ -159,10 +153,3 @@ r300CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, xoffset, yoffset, x, y, width, height); } } - - -void r300_init_texcopy_functions(struct dd_function_table *table) -{ - table->CopyTexImage2D = r300CopyTexImage2D; - table->CopyTexSubImage2D = r300CopyTexSubImage2D; -}
\ No newline at end of file diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 03178116c1a..20a27ad9a70 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -197,21 +197,6 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj) radeon_bo_unmap(t->mt->bo); } -GLuint radeon_face_for_target(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - default: - return 0; - } -} - /** * Wraps Mesa's implementation to ensure that the base level image is mapped. * @@ -248,7 +233,7 @@ static void radeon_generate_mipmap(GLcontext *ctx, GLenum target, void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj) { - GLuint face = radeon_face_for_target(target); + GLuint face = _mesa_tex_target_to_face(target); radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]); radeon_teximage_map(baseimage, GL_FALSE); @@ -710,7 +695,7 @@ static void radeon_teximage( radeon_texture_image* image = get_radeon_texture_image(texImage); GLint postConvWidth = width; GLint postConvHeight = height; - GLuint face = radeon_face_for_target(target); + GLuint face = _mesa_tex_target_to_face(target); { struct radeon_bo *bo; @@ -863,7 +848,7 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve if (RADEON_DEBUG & RADEON_TEXTURE) { fprintf(stderr, "radeon_texsubimage%dd: texObj %p, texImage %p, face %d, level %d\n", - dims, texObj, texImage, radeon_face_for_target(target), level); + dims, texObj, texImage, _mesa_tex_target_to_face(target), level); } t->validated = GL_FALSE; diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 906daf12d0a..f09dd652142 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -44,7 +44,6 @@ void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj); void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj); void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj); int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj); -GLuint radeon_face_for_target(GLenum target); gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx, GLint internalFormat, @@ -126,4 +125,14 @@ void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage); +void radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border); + +void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height); + #endif diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h index e81d7fdcd0e..1b33de1edf7 100644 --- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h +++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h @@ -1959,7 +1959,30 @@ #define RADEON_SE_ZBIAS_FACTOR 0x1db0 #define RADEON_SE_ZBIAS_CONSTANT 0x1db4 - +#define RADEON_SE_VTX_FMT 0x2080 +# define RADEON_SE_VTX_FMT_XY 0x00000000 +# define RADEON_SE_VTX_FMT_W0 0x00000001 +# define RADEON_SE_VTX_FMT_FPCOLOR 0x00000002 +# define RADEON_SE_VTX_FMT_FPALPHA 0x00000004 +# define RADEON_SE_VTX_FMT_PKCOLOR 0x00000008 +# define RADEON_SE_VTX_FMT_FPSPEC 0x00000010 +# define RADEON_SE_VTX_FMT_FPFOG 0x00000020 +# define RADEON_SE_VTX_FMT_PKSPEC 0x00000040 +# define RADEON_SE_VTX_FMT_ST0 0x00000080 +# define RADEON_SE_VTX_FMT_ST1 0x00000100 +# define RADEON_SE_VTX_FMT_Q1 0x00000200 +# define RADEON_SE_VTX_FMT_ST2 0x00000400 +# define RADEON_SE_VTX_FMT_Q2 0x00000800 +# define RADEON_SE_VTX_FMT_ST3 0x00001000 +# define RADEON_SE_VTX_FMT_Q3 0x00002000 +# define RADEON_SE_VTX_FMT_Q0 0x00004000 +# define RADEON_SE_VTX_FMT_BLND_WEIGHT_CNT_MASK 0x00038000 +# define RADEON_SE_VTX_FMT_N0 0x00040000 +# define RADEON_SE_VTX_FMT_XY1 0x08000000 +# define RADEON_SE_VTX_FMT_Z1 0x10000000 +# define RADEON_SE_VTX_FMT_W1 0x20000000 +# define RADEON_SE_VTX_FMT_N1 0x40000000 +# define RADEON_SE_VTX_FMT_Z 0x80000000 /* Registers for CP and Microcode Engine */ #define RADEON_CP_ME_RAM_ADDR 0x07d4 diff --git a/src/mesa/glapi/gl_XML.py b/src/mesa/glapi/gl_XML.py index bafb00306f8..a10a35e513c 100644 --- a/src/mesa/glapi/gl_XML.py +++ b/src/mesa/glapi/gl_XML.py @@ -184,7 +184,7 @@ class gl_print_base: The name is also added to the file's undef_list. """ self.undef_list.append("PURE") - print """# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) + print """# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PURE __attribute__((pure)) # else # define PURE @@ -224,7 +224,7 @@ class gl_print_base: """ self.undef_list.append(S) - print """# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__) + print """# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__) # define %s __attribute__((visibility("%s"))) # else # define %s @@ -244,7 +244,7 @@ class gl_print_base: """ self.undef_list.append("NOINLINE") - print """# if defined(__GNUC__) + print """# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define NOINLINE __attribute__((noinline)) # else # define NOINLINE diff --git a/src/mesa/glapi/glapitemp.h b/src/mesa/glapi/glapitemp.h index 6767a07673d..b8bfcc1a160 100644 --- a/src/mesa/glapi/glapitemp.h +++ b/src/mesa/glapi/glapitemp.h @@ -27,7 +27,7 @@ */ -# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__) +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__) # define HIDDEN __attribute__((visibility("hidden"))) # else # define HIDDEN diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 4eb249b4af1..9eab1ead246 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -173,7 +173,8 @@ extern "C" { * We also need to define a USED attribute, so the optimizer doesn't * inline a static function that we later use in an alias. - ajax */ -#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +#if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PUBLIC __attribute__((visibility("default"))) # define USED __attribute__((used)) #else diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 7b3599f9322..4da245ab495 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -861,6 +861,9 @@ _mesa_GenRenderbuffersEXT(GLsizei n, GLuint *renderbuffers) * * \return one of GL_RGB, GL_RGBA, GL_STENCIL_INDEX, GL_DEPTH_COMPONENT * GL_DEPTH_STENCIL_EXT or zero if error. + * + * XXX in the future when we support red-only and red-green formats + * we'll also return GL_RED and GL_RG. */ GLenum _mesa_base_fbo_format(GLcontext *ctx, GLenum internalFormat) diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 77153889b68..d70cf877e84 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -57,7 +57,7 @@ void st_upload_constants( struct st_context *st, unsigned shader_type) { struct pipe_context *pipe = st->pipe; - struct pipe_constant_buffer *cbuf = &st->state.constants[shader_type]; + struct pipe_buffer **cbuf = &st->state.constants[shader_type]; assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT); @@ -71,8 +71,8 @@ void st_upload_constants( struct st_context *st, /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL ); - cbuf->buffer = pipe_buffer_create(pipe->screen, 16, + pipe_buffer_reference(cbuf, NULL ); + *cbuf = pipe_buffer_create(pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, paramBytes ); @@ -84,12 +84,12 @@ void st_upload_constants( struct st_context *st, } /* load Mesa constants into the constant buffer */ - if (cbuf->buffer) - st_no_flush_pipe_buffer_write(st, cbuf->buffer, + if (cbuf) + st_no_flush_pipe_buffer_write(st, *cbuf, 0, paramBytes, params->ParameterValues); - st->pipe->set_constant_buffer(st->pipe, shader_type, 0, cbuf); + st->pipe->set_constant_buffer(st->pipe, shader_type, 0, *cbuf); } else { st->constants.tracked_state[shader_type].dirty.mesa = 0x0; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 46c8cbb3098..176f3ea68d7 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -35,8 +35,6 @@ * Brian Paul */ - - #include "main/imports.h" #include "main/mtypes.h" #include "main/macros.h" @@ -57,9 +55,7 @@ - - -/* +/** * Translate fragment program if needed. */ static void @@ -155,8 +151,10 @@ find_translated_vp(struct st_context *st, } - - +/** + * Return pointer to a pass-through fragment shader. + * This shader is used when a texture is missing/incomplete. + */ static void * get_passthrough_fs(struct st_context *st) { @@ -168,6 +166,11 @@ get_passthrough_fs(struct st_context *st) return st->passthrough_fs; } + +/** + * Update fragment program state/atom. This involves translating the + * Mesa fragment program into a gallium fragment program and binding it. + */ static void update_fp( struct st_context *st ) { @@ -191,6 +194,7 @@ update_fp( struct st_context *st ) } } + const struct st_tracked_state st_update_fp = { "st_update_fp", /* name */ { /* dirty */ @@ -202,7 +206,10 @@ const struct st_tracked_state st_update_fp = { - +/** + * Update vertex program state/atom. This involves translating the + * Mesa vertex program into a gallium fragment program and binding it. + */ static void update_vp( struct st_context *st ) { diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 145bd62b83e..9e6ce30db04 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -218,8 +218,8 @@ static void st_destroy_context_priv( struct st_context *st ) } for (i = 0; i < Elements(st->state.constants); i++) { - if (st->state.constants[i].buffer) { - pipe_buffer_reference(&st->state.constants[i].buffer, NULL); + if (st->state.constants[i]) { + pipe_buffer_reference(&st->state.constants[i], NULL); } } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 831909a3f8f..2c4943cfb0a 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -92,7 +92,7 @@ struct st_context struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_buffer *constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS]; struct pipe_scissor_state scissor; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index e54f21be600..b0d5b993a77 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -365,6 +365,7 @@ setup_interleaved_attribs(GLcontext *ctx, velements[attr].src_offset = (unsigned) (arrays[mesaAttr]->Ptr - offset0); + velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = 0; velements[attr].nr_components = arrays[mesaAttr]->Size; velements[attr].src_format = @@ -454,6 +455,7 @@ setup_non_interleaved_attribs(GLcontext *ctx, /* common-case setup */ vbuffer[attr].stride = stride; /* in bytes */ vbuffer[attr].max_index = max_index; + velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].nr_components = arrays[mesaAttr]->Size; velements[attr].src_format @@ -522,7 +524,6 @@ st_draw_vbo(GLcontext *ctx, struct pipe_context *pipe = ctx->st->pipe; const struct st_vertex_program *vp; const struct st_vp_varient *vpv; - const struct pipe_shader_state *vs; struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; GLuint attr; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; @@ -550,7 +551,6 @@ st_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; vpv = ctx->st->vp_varient; - vs = &vpv->state; #if 0 if (MESA_VERBOSE & VERBOSE_GLSL) { diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index cfc0caac983..a05d6dd06bd 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -177,6 +177,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* common-case setup */ vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */ vbuffers[attr].max_index = max_index; + velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].nr_components = arrays[mesaAttr]->Size; velements[attr].src_format = @@ -239,11 +240,11 @@ st_feedback_draw_vbo(GLcontext *ctx, /* map constant buffers */ mapped_constants = pipe_buffer_map(pipe->screen, - st->state.constants[PIPE_SHADER_VERTEX].buffer, + st->state.constants[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_constant_buffer(st->draw, PIPE_SHADER_VERTEX, mapped_constants, - st->state.constants[PIPE_SHADER_VERTEX].buffer->size); + st->state.constants[PIPE_SHADER_VERTEX]->size); /* draw here */ @@ -253,7 +254,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* unmap constant buffers */ - pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer); + pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX]); /* * unmap vertex/index buffers diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index e25a613d8a9..2a5fb27d8f3 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -168,6 +168,7 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_blend_subtract = GL_TRUE; ctx->Extensions.EXT_framebuffer_blit = GL_TRUE; ctx->Extensions.EXT_framebuffer_object = GL_TRUE; + ctx->Extensions.EXT_framebuffer_multisample = GL_TRUE; ctx->Extensions.EXT_fog_coord = GL_TRUE; ctx->Extensions.EXT_multi_draw_arrays = GL_TRUE; ctx->Extensions.EXT_pixel_buffer_object = GL_TRUE; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index e788008dfe1..05b56c9b584 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -48,6 +48,10 @@ struct label { unsigned token; }; + +/** + * Intermediate state used during shader translation. + */ struct st_translate { struct ureg_program *ureg; @@ -730,6 +734,7 @@ emit_face_var( struct st_translate *t, t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); } + static void emit_edgeflags( struct st_translate *t, const struct gl_program *program ) @@ -741,6 +746,7 @@ emit_edgeflags( struct st_translate *t, ureg_MOV( ureg, edge_dst, edge_src ); } + /** * Translate Mesa program to TGSI format. * \param program the program to translate @@ -758,7 +764,7 @@ emit_edgeflags( struct st_translate *t, * \param outputSemanticIndex the semantic index (ex: which texcoord) for * each output * - * \return array of translated tokens, caller's responsibility to free + * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY */ enum pipe_error st_translate_mesa_program( @@ -779,6 +785,7 @@ st_translate_mesa_program( { struct st_translate translate, *t; unsigned i; + enum pipe_error ret = PIPE_OK; t = &translate; memset(t, 0, sizeof *t); @@ -865,8 +872,10 @@ st_translate_mesa_program( t->constants = CALLOC( program->Parameters->NumParameters, sizeof t->constants[0] ); - if (t->constants == NULL) + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; + } for (i = 0; i < program->Parameters->NumParameters; i++) { switch (program->Parameters->Parameters[i].Type) { @@ -920,8 +929,6 @@ st_translate_mesa_program( t->insn[t->labels[i].branch_target] ); } - return PIPE_OK; - out: FREE(t->insn); FREE(t->labels); @@ -931,7 +938,7 @@ out: debug_printf("%s: translate error flag set\n", __FUNCTION__); } - return PIPE_ERROR_OUT_OF_MEMORY; + return ret; } diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 6a869fae904..5c87e47ca3d 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -44,7 +44,6 @@ #include "st_debug.h" #include "st_context.h" -#include "st_atom.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "cso_cache/cso_context.h" diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 8a3e4cd3ac3..b210ac91873 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -35,7 +35,6 @@ #include "main/texfetch.h" #include "main/teximage.h" #include "main/texobj.h" -#include "main/texstore.h" #undef Elements /* fix re-defined macro warning */ diff --git a/src/mesa/swrast/s_accum.c b/src/mesa/swrast/s_accum.c index 0e0876efcba..cf53f01b7c1 100644 --- a/src/mesa/swrast/s_accum.c +++ b/src/mesa/swrast/s_accum.c @@ -27,7 +27,6 @@ #include "main/context.h" #include "main/macros.h" #include "main/imports.h" -#include "main/fbobject.h" #include "s_accum.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c index e88ff191239..353e9999d61 100644 --- a/src/mesa/swrast/s_atifragshader.c +++ b/src/mesa/swrast/s_atifragshader.c @@ -23,7 +23,6 @@ #include "main/colormac.h" #include "main/context.h" #include "main/macros.h" -#include "shader/program.h" #include "shader/atifragshader.h" #include "swrast/s_atifragshader.h" diff --git a/src/mesa/swrast/s_bitmap.c b/src/mesa/swrast/s_bitmap.c index 46c63aa6458..59e26e9ea31 100644 --- a/src/mesa/swrast/s_bitmap.c +++ b/src/mesa/swrast/s_bitmap.c @@ -33,7 +33,6 @@ #include "main/condrender.h" #include "main/image.h" #include "main/macros.h" -#include "main/pixel.h" #include "s_context.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c index 986b6aff4f0..e881d1be30c 100644 --- a/src/mesa/swrast/s_copypix.c +++ b/src/mesa/swrast/s_copypix.c @@ -28,11 +28,9 @@ #include "main/colormac.h" #include "main/condrender.h" #include "main/convolve.h" -#include "main/histogram.h" #include "main/image.h" #include "main/macros.h" #include "main/imports.h" -#include "main/pixel.h" #include "s_context.h" #include "s_depth.h" diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c index c37a54eb3eb..0b6bb7e3ec9 100644 --- a/src/mesa/swrast/s_depth.c +++ b/src/mesa/swrast/s_depth.c @@ -28,7 +28,6 @@ #include "main/formats.h" #include "main/macros.h" #include "main/imports.h" -#include "main/fbobject.h" #include "s_depth.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c index 55a4c4c3c61..248d6cc1c04 100644 --- a/src/mesa/swrast/s_drawpix.c +++ b/src/mesa/swrast/s_drawpix.c @@ -31,7 +31,6 @@ #include "main/image.h" #include "main/macros.h" #include "main/imports.h" -#include "main/pixel.h" #include "main/state.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_feedback.c b/src/mesa/swrast/s_feedback.c index 47ed25ee100..2e6066983d5 100644 --- a/src/mesa/swrast/s_feedback.c +++ b/src/mesa/swrast/s_feedback.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/colormac.h" #include "main/context.h" -#include "main/enums.h" #include "main/feedback.h" #include "main/macros.h" diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index a22d34415d5..9ac33a26a65 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/colormac.h" #include "main/context.h" -#include "main/texstate.h" #include "shader/prog_instruction.h" #include "s_fragprog.h" diff --git a/src/mesa/swrast/s_lines.c b/src/mesa/swrast/s_lines.c index 23cb9b57eff..5411229d700 100644 --- a/src/mesa/swrast/s_lines.c +++ b/src/mesa/swrast/s_lines.c @@ -29,7 +29,6 @@ #include "main/macros.h" #include "s_aaline.h" #include "s_context.h" -#include "s_depth.h" #include "s_feedback.h" #include "s_lines.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c index 50ec2063a55..6b955429e94 100644 --- a/src/mesa/swrast/s_points.c +++ b/src/mesa/swrast/s_points.c @@ -27,7 +27,6 @@ #include "main/colormac.h" #include "main/context.h" #include "main/macros.h" -#include "main/texstate.h" #include "s_context.h" #include "s_feedback.h" #include "s_points.h" diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 44a11cd6dd2..94fb974eab0 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -33,7 +33,6 @@ #include "main/image.h" #include "main/macros.h" #include "main/imports.h" -#include "main/pixel.h" #include "main/state.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 889164b9861..594b71a03c6 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -29,7 +29,6 @@ #include "main/colormac.h" #include "main/image.h" #include "main/imports.h" -#include "main/pixel.h" #include "shader/prog_instruction.h" #include "s_context.h" diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index db21b4589de..5a14e595a04 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -38,7 +38,6 @@ #include "tnl.h" #include "t_context.h" #include "t_pipeline.h" -#include "t_vp_build.h" #include "vbo/vbo.h" diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index d31b29b9b40..38757a0e288 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -29,15 +29,11 @@ #include "main/condrender.h" #include "main/context.h" #include "main/imports.h" -#include "main/state.h" #include "main/mtypes.h" #include "main/macros.h" #include "main/enums.h" #include "t_context.h" -#include "t_pipeline.h" -#include "t_vp_build.h" -#include "t_vertex.h" #include "tnl.h" diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 01b30babb48..946b29e250f 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/context.h" #include "main/imports.h" -#include "main/state.h" #include "main/mtypes.h" #include "t_context.h" diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index 99b67874556..13b84a7d77b 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -29,7 +29,6 @@ #include "main/feedback.h" #include "main/light.h" #include "main/macros.h" -#include "main/rastpos.h" #include "main/simple_list.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 15a8a67b91c..5396548666f 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -40,7 +40,6 @@ #include "shader/prog_statevars.h" #include "shader/prog_execute.h" #include "swrast/s_context.h" -#include "swrast/s_texfilter.h" #include "tnl/tnl.h" #include "tnl/t_context.h" diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c index e168a89ea5e..a057befed0a 100644 --- a/src/mesa/vbo/vbo_exec.c +++ b/src/mesa/vbo/vbo_exec.c @@ -28,9 +28,6 @@ #include "main/api_arrayelt.h" #include "main/glheader.h" -#include "main/imports.h" -#include "main/context.h" -#include "main/macros.h" #include "main/mtypes.h" #include "main/vtxfmt.h" diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 6de8f059b79..2c82f7c9c5c 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -35,7 +35,6 @@ #include "main/bufferobj.h" #include "main/enums.h" #include "main/macros.h" -#include "glapi/dispatch.h" #include "vbo_context.h" diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 4f43856016c..d7dbbceb1b7 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -30,7 +30,6 @@ #include "main/context.h" #include "main/enums.h" #include "main/state.h" -#include "main/macros.h" #include "vbo_context.h" diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index 9757c3d9f61..10f705cf844 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -28,8 +28,6 @@ #include "main/mtypes.h" #include "main/bufferobj.h" -#include "main/dlist.h" -#include "main/vtxfmt.h" #include "main/imports.h" #include "vbo_context.h" diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c index b7a74e4535a..f13a16e3b51 100644 --- a/src/mesa/vbo/vbo_save_loopback.c +++ b/src/mesa/vbo/vbo_save_loopback.c @@ -29,7 +29,6 @@ #include "main/glheader.h" #include "main/enums.h" #include "main/imports.h" -#include "main/macros.h" #include "main/mtypes.h" #include "glapi/dispatch.h" #include "glapi/glapi.h" diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index c45190b9dd3..2ca111217ce 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -34,7 +34,6 @@ #include "main/imports.h" #include "main/image.h" #include "main/macros.h" -#include "main/enums.h" #include "main/mtypes.h" #include "vbo_split.h" @@ -221,8 +220,6 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag ) { struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr]; -/* _mesa_printf("begin %s (%d)\n", _mesa_lookup_prim_by_nr(mode), begin_flag); */ - prim->mode = mode; prim->begin = begin_flag; } diff --git a/src/mesa/x86/x86_xform.c b/src/mesa/x86/x86_xform.c index 52f6b25d817..c834e2b468b 100644 --- a/src/mesa/x86/x86_xform.c +++ b/src/mesa/x86/x86_xform.c @@ -30,7 +30,6 @@ #include "main/glheader.h" #include "main/context.h" #include "math/m_xform.h" -#include "tnl/t_context.h" #include "x86_xform.h" #include "common_x86_asm.h" |