diff options
82 files changed, 2534 insertions, 1001 deletions
diff --git a/docs/egl.html b/docs/egl.html index 57b1d1488a8..30cbe0eaedd 100644 --- a/docs/egl.html +++ b/docs/egl.html @@ -126,10 +126,21 @@ test your build. For example,</p> runtime</p> <ul> +<li><code>EGL_DRIVERS_PATH</code> + +<p>By default, the main library will look for drivers in the directory where +the drivers are installed to. This variable specifies a list of +colon-separated directories where the main library will look for drivers, in +addition to the default directory. This variable is ignored for setuid/setgid +binaries.</p> + +</li> + <li><code>EGL_DRIVER</code> -<p>This variable forces the specified EGL driver to be loaded. It comes in -handy when one wants to test a specific driver.</p> +<p>This variable specifies a full path to an EGL driver and it forces the +specified EGL driver to be loaded. It comes in handy when one wants to test a +specific driver. This variable is ignored for setuid/setgid binaries.</p> </li> diff --git a/docs/envvars.html b/docs/envvars.html index bb1c914cc73..fd1700a02f1 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -69,6 +69,10 @@ These environment variables are for the Radeon R300 driver: <li>R300_NO_TCL - if set, disable hardware-accelerated Transform/Clip/Lighting. </ul> +<p> +Mesa EGL supports different sets of environment variables. See the +<a href="egl.html">Mesa EGL</a> page for the details. +</p> </BODY> </HTML> diff --git a/progs/es2/xegl/tri.c b/progs/es2/xegl/tri.c index 7729a099578..8981d8a7e21 100644 --- a/progs/es2/xegl/tri.c +++ b/progs/es2/xegl/tri.c @@ -334,14 +334,15 @@ make_x_window(Display *x_dpy, EGLDisplay egl_dpy, exit(1); } + /* sanity checks */ { EGLint val; eglQuerySurface(egl_dpy, *surfRet, EGL_WIDTH, &val); assert(val == width); eglQuerySurface(egl_dpy, *surfRet, EGL_HEIGHT, &val); assert(val == height); - eglQuerySurface(egl_dpy, *surfRet, EGL_SURFACE_TYPE, &val); - assert(val == EGL_WINDOW_BIT); + assert(eglGetConfigAttrib(egl_dpy, config, EGL_SURFACE_TYPE, &val)); + assert(val & EGL_WINDOW_BIT); } XFree(visInfo); diff --git a/progs/fpglsl/.gitignore b/progs/fpglsl/.gitignore new file mode 100644 index 00000000000..9fe73ab0678 --- /dev/null +++ b/progs/fpglsl/.gitignore @@ -0,0 +1 @@ +fp-tri diff --git a/progs/fpglsl/Makefile b/progs/fpglsl/Makefile new file mode 100644 index 00000000000..3bf14b4b709 --- /dev/null +++ b/progs/fpglsl/Makefile @@ -0,0 +1,52 @@ +# progs/tests/Makefile + + +# These programs aren't intended to be included with the normal distro. +# They're not too interesting but they're good for testing. + +TOP = ../.. +include $(TOP)/configs/current + +LIBS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLEW_LIB) -l$(GLU_LIB) -l$(GL_LIB) $(APP_LIB_DEPS) + +SOURCES = \ + fp-tri.c + + + +PROGS = $(SOURCES:%.c=%) + +INCLUDES = -I. -I$(TOP)/include -I../samples + + +##### RULES ##### + +.SUFFIXES: +.SUFFIXES: .c + +.c: + $(CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@ + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: $(PROGS) + +clean: + rm -f $(PROGS) + rm -f *.o + rm -f getproclist.h + + + + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` diff --git a/progs/fpglsl/SConscript b/progs/fpglsl/SConscript new file mode 100644 index 00000000000..e31fa320238 --- /dev/null +++ b/progs/fpglsl/SConscript @@ -0,0 +1,13 @@ +Import('env') + +if not env['GLUT']: + Return() + +env = env.Clone() + +env.Prepend(LIBS = ['$GLUT_LIB']) + +env.Program( + target = 'fp-tri', + source = ['fp-tri.c'], + ) diff --git a/progs/fpglsl/fp-tri.c b/progs/fpglsl/fp-tri.c new file mode 100644 index 00000000000..c9b08fbbad7 --- /dev/null +++ b/progs/fpglsl/fp-tri.c @@ -0,0 +1,415 @@ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#ifndef WIN32 +#include <unistd.h> +#include <signal.h> +#endif + +#include <GL/glew.h> +#include <GL/glut.h> + +#include "readtex.c" + + +#define TEXTURE_FILE "../images/bw.rgb" + +unsigned show_fps = 0; +unsigned int frame_cnt = 0; +void alarmhandler(int); +static const char *filename = NULL; + +static GLuint fragShader; +static GLuint vertShader; +static GLuint program; + + +static void usage(char *name) +{ + fprintf(stderr, "usage: %s [ options ] shader_filename\n", name); +#ifndef WIN32 + fprintf(stderr, "\n" ); + fprintf(stderr, "options:\n"); + fprintf(stderr, " -fps show frames per second\n"); +#endif +} + +#ifndef WIN32 +void alarmhandler (int sig) +{ + if (sig == SIGALRM) { + printf("%d frames in 5.0 seconds = %.3f FPS\n", frame_cnt, + frame_cnt / 5.0); + + frame_cnt = 0; + } + signal(SIGALRM, alarmhandler); + alarm(5); +} +#endif + + + + +static void load_and_compile_shader(GLuint shader, const char *text) +{ + GLint stat; + + glShaderSource(shader, 1, (const GLchar **) &text, NULL); + + glCompileShader(shader); + + glGetShaderiv(shader, GL_COMPILE_STATUS, &stat); + if (!stat) { + GLchar log[1000]; + GLsizei len; + glGetShaderInfoLog(shader, 1000, &len, log); + fprintf(stderr, "fp-tri: problem compiling shader:\n%s\n", log); + exit(1); + } +} + +static void read_shader(GLuint shader, const char *filename) +{ + const int max = 100*1000; + int n; + char *buffer = (char*) malloc(max); + FILE *f = fopen(filename, "r"); + if (!f) { + fprintf(stderr, "fp-tri: Unable to open shader file %s\n", filename); + exit(1); + } + + n = fread(buffer, 1, max, f); + printf("fp-tri: read %d bytes from shader file %s\n", n, filename); + if (n > 0) { + buffer[n] = 0; + load_and_compile_shader(shader, buffer); + } + + fclose(f); + free(buffer); +} + +static void check_link(GLuint prog) +{ + GLint stat; + glGetProgramiv(prog, GL_LINK_STATUS, &stat); + if (!stat) { + GLchar log[1000]; + GLsizei len; + glGetProgramInfoLog(prog, 1000, &len, log); + fprintf(stderr, "Linker error:\n%s\n", log); + } +} + +static void setup_uniforms() +{ + { + GLint loc1f = glGetUniformLocationARB(program, "Offset1f"); + GLint loc2f = glGetUniformLocationARB(program, "Offset2f"); + GLint loc4f = glGetUniformLocationARB(program, "Offset4f"); + GLfloat vecKer[] = + { 1.0, 0.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + 1.0, 0.0, 0.0, 1.0, + 0.0, 0.0, 0.0, 1.0 + }; + if (loc1f >= 0) + glUniform1fv(loc1f, 16, vecKer); + + if (loc2f >= 0) + glUniform2fv(loc2f, 8, vecKer); + + if (loc4f >= 0) + glUniform4fv(loc4f, 4, vecKer); + + } + + { + GLint loc1f = glGetUniformLocationARB(program, "KernelValue1f"); + GLint loc2f = glGetUniformLocationARB(program, "KernelValue2f"); + GLint loc4f = glGetUniformLocationARB(program, "KernelValue4f"); + GLfloat vecKer[] = + { 1.0, 0.0, 0.0, 0.25, + 0.0, 1.0, 0.0, 0.25, + 0.0, 0.0, 1.0, 0.25, + 0.0, 0.0, 0.0, 0.25, + 0.5, 0.0, 0.0, 0.35, + 0.0, 0.5, 0.0, 0.35, + 0.0, 0.0, 0.5, 0.35, + 0.0, 0.0, 0.0, 0.35 + }; + if (loc1f >= 0) + glUniform1fv(loc1f, 16, vecKer); + + if (loc2f >= 0) + glUniform2fv(loc2f, 8, vecKer); + + if (loc4f >= 0) + glUniform4fv(loc4f, 4, vecKer); + } +} + +static void prepare_shaders() +{ + static const char *fragShaderText = + "void main() {\n" + " gl_FragColor = gl_Color;\n" + "}\n"; + static const char *vertShaderText = + "void main() {\n" + " gl_FrontColor = gl_Color;\n" + " gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n" + "}\n"; + fragShader = glCreateShader(GL_FRAGMENT_SHADER); + if (filename) + read_shader(fragShader, filename); + else + load_and_compile_shader(fragShader, fragShaderText); + + + vertShader = glCreateShader(GL_VERTEX_SHADER); + load_and_compile_shader(vertShader, vertShaderText); + + program = glCreateProgram(); + glAttachShader(program, fragShader); + glAttachShader(program, vertShader); + glLinkProgram(program); + check_link(program); + glUseProgram(program); + + setup_uniforms(); +} + +#define LEVELS 8 +#define SIZE (1<<LEVELS) +static int TexWidth = SIZE, TexHeight = SIZE; + + +static void +ResetTextureLevel( int i ) +{ + GLubyte tex2d[SIZE*SIZE][4]; + + { + GLint Width = TexWidth / (1 << i); + GLint Height = TexHeight / (1 << i); + GLint s, t; + + for (s = 0; s < Width; s++) { + for (t = 0; t < Height; t++) { + tex2d[t*Width+s][0] = ((s / 16) % 2) ? 0 : 255; + tex2d[t*Width+s][1] = ((t / 16) % 2) ? 0 : 255; + tex2d[t*Width+s][2] = 128; + tex2d[t*Width+s][3] = 255; + } + } + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + glTexImage2D(GL_TEXTURE_2D, i, GL_RGB, Width, Height, 0, + GL_RGBA, GL_UNSIGNED_BYTE, tex2d); + } +} + + +static void +ResetTexture( void ) +{ + int i; + + for (i = 0; i <= LEVELS; i++) + { + ResetTextureLevel(i); + } +} + +static void Init( void ) +{ + GLuint Texture; + + /* Load texture */ + glGenTextures(1, &Texture); + glBindTexture(GL_TEXTURE_2D, Texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + if (!LoadRGBMipmaps(TEXTURE_FILE, GL_RGB)) { + printf("Error: couldn't load texture image file %s\n", TEXTURE_FILE); + exit(1); + } + + + glGenTextures(1, &Texture); + glActiveTextureARB(GL_TEXTURE0_ARB + 1); + glBindTexture(GL_TEXTURE_2D, Texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + { + GLubyte data[32][32]; + int width = 32; + int height = 32; + int i; + int j; + + for (i = 0; i < 32; i++) + for (j = 0; j < 32; j++) + { + /** + ** +-----------+ + ** | W | + ** | +-----+ | + ** | | | | + ** | | B | | + ** | | | | + ** | +-----+ | + ** | | + ** +-----------+ + **/ + int i2 = i - height / 2; + int j2 = j - width / 2; + int h8 = height / 8; + int w8 = width / 8; + if ( -h8 <= i2 && i2 <= h8 && -w8 <= j2 && j2 <= w8 ) { + data[i][j] = 0x00; + } else if ( -2 * h8 <= i2 && i2 <= 2 * h8 && -2 * w8 <= j2 && j2 <= 2 * w8 ) { + data[i][j] = 0x55; + } else if ( -3 * h8 <= i2 && i2 <= 3 * h8 && -3 * w8 <= j2 && j2 <= 3 * w8 ) { + data[i][j] = 0xaa; + } else { + data[i][j] = 0xff; + } + } + + glTexImage2D( GL_TEXTURE_2D, 0, + GL_ALPHA8, + 32, 32, 0, + GL_ALPHA, GL_UNSIGNED_BYTE, data ); + } + + glGenTextures(1, &Texture); + glActiveTextureARB(GL_TEXTURE0_ARB + 2); + glBindTexture(GL_TEXTURE_2D, Texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + ResetTexture(); + + glClearColor(.1, .3, .5, 0); +} + + + + +static void args(int argc, char *argv[]) +{ + GLint i; + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "-fps") == 0) { + show_fps = 1; + } + else if (i == argc - 1) { + filename = argv[i]; + } + else { + usage(argv[0]); + exit(1); + } + } +} + + + + + +static void Reshape(int width, int height) +{ + + glViewport(0, 0, (GLint)width, (GLint)height); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(-1.0, 1.0, -1.0, 1.0, -0.5, 1000.0); + glMatrixMode(GL_MODELVIEW); +} + +static void CleanUp(void) +{ + glDeleteShader(fragShader); + glDeleteShader(vertShader); + glDeleteProgram(program); +} + +static void Key(unsigned char key, int x, int y) +{ + + switch (key) { + case 27: + CleanUp(); + exit(1); + default: + break; + } + + glutPostRedisplay(); +} + +static void Display(void) +{ + glClear(GL_COLOR_BUFFER_BIT); + + glUseProgram(program); + glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 0, 1.0, 1.0, 0.0, 0.0); + glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 1, 0.0, 0.0, 1.0, 1.0); + glBegin(GL_TRIANGLES); + + glColor3f(0,0,1); + glTexCoord3f(1,1,0); + glVertex3f( 0.9, -0.9, -30.0); + + glColor3f(1,0,0); + glTexCoord3f(1,-1,0); + glVertex3f( 0.9, 0.9, -30.0); + + glColor3f(0,1,0); + glTexCoord3f(-1,0,0); + glVertex3f(-0.9, 0.0, -30.0); + glEnd(); + + glFlush(); + if (show_fps) { + ++frame_cnt; + glutPostRedisplay(); + } +} + + +int main(int argc, char **argv) +{ + glutInit(&argc, argv); + glutInitWindowPosition(0, 0); + glutInitWindowSize(250, 250); + glutInitDisplayMode(GLUT_RGB | GLUT_SINGLE | GLUT_DEPTH); + args(argc, argv); + glutCreateWindow(filename ? filename : "fp-tri"); + glewInit(); + glutReshapeFunc(Reshape); + glutKeyboardFunc(Key); + glutDisplayFunc(Display); + prepare_shaders(); + Init(); +#ifndef WIN32 + if (show_fps) { + signal(SIGALRM, alarmhandler); + alarm(5); + } +#endif + glutMainLoop(); + return 0; +} diff --git a/progs/fpglsl/mov-imm.glsl b/progs/fpglsl/mov-imm.glsl new file mode 100644 index 00000000000..cbb75ce342c --- /dev/null +++ b/progs/fpglsl/mov-imm.glsl @@ -0,0 +1,3 @@ +void main() { + gl_FragColor = vec4(1,0,1,1); +} diff --git a/progs/fpglsl/mov.glsl b/progs/fpglsl/mov.glsl new file mode 100644 index 00000000000..4a1f185ba44 --- /dev/null +++ b/progs/fpglsl/mov.glsl @@ -0,0 +1,3 @@ +void main() { + gl_FragColor = gl_Color; +} diff --git a/progs/fpglsl/tex-multi.glsl b/progs/fpglsl/tex-multi.glsl new file mode 100644 index 00000000000..5220b7efaf2 --- /dev/null +++ b/progs/fpglsl/tex-multi.glsl @@ -0,0 +1,15 @@ +// Multi-texture fragment shader +// Brian Paul + +// Composite second texture over first. +// We're assuming the 2nd texture has a meaningful alpha channel. + +uniform sampler2D tex1; +uniform sampler2D tex2; + +void main() +{ + vec4 t1 = texture2D(tex1, gl_Color.xy); + vec4 t2 = texture2D(tex2, gl_Color.yz); + gl_FragColor = mix(t1, t2, t2.w); +} diff --git a/progs/fpglsl/tex.glsl b/progs/fpglsl/tex.glsl new file mode 100644 index 00000000000..4302fabe2d5 --- /dev/null +++ b/progs/fpglsl/tex.glsl @@ -0,0 +1,6 @@ +uniform sampler2D tex1; + +void main() +{ + gl_FragColor = texture2D(tex1, gl_Color.xy); +} diff --git a/progs/tests/Makefile b/progs/tests/Makefile index 836396b2499..a38f411def8 100644 --- a/progs/tests/Makefile +++ b/progs/tests/Makefile @@ -48,6 +48,7 @@ SOURCES = \ floattex.c \ fbotest1.c \ fbotest2.c \ + fbotest3.c \ fillrate.c \ fog.c \ fogcoord.c \ diff --git a/progs/tests/SConscript b/progs/tests/SConscript index e2c65382887..0a11b965f79 100644 --- a/progs/tests/SConscript +++ b/progs/tests/SConscript @@ -51,6 +51,7 @@ progs = [ 'ext422square', 'fbotest1', 'fbotest2', + 'fbotest3', 'fillrate', 'floattex', 'fog', diff --git a/progs/tests/fbotest3.c b/progs/tests/fbotest3.c new file mode 100644 index 00000000000..8e288b38b83 --- /dev/null +++ b/progs/tests/fbotest3.c @@ -0,0 +1,231 @@ +/* + * Test GL_EXT_framebuffer_object + * Like fbotest2.c but use a texture for the Z buffer / renderbuffer. + * Note: the Z texture is never resized so that limits what can be + * rendered if the window is resized. + * + * This tests a bug reported by Christoph Bumiller on 1 Feb 2010 + * on mesa3d-dev. + * + * XXX this should be made into a piglit test. + * + * Brian Paul + * 1 Feb 2010 + */ + + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <GL/glew.h> +#include <GL/glut.h> + + +static int Win = 0; +static int Width = 400, Height = 400; +static GLuint Tex = 0; +static GLuint MyFB, ColorRb, DepthRb; +static GLboolean Animate = GL_FALSE; +static GLfloat Rotation = 0.0; + + +static void +CheckError(int line) +{ + GLenum err = glGetError(); + if (err) { + printf("fbotest3: GL Error 0x%x at line %d\n", (int) err, line); + } +} + + +static void +Display( void ) +{ + GLubyte *buffer = malloc(Width * Height * 4); + GLenum status; + + CheckError(__LINE__); + + /* draw to user framebuffer */ + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB); + glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT); + glReadBuffer(GL_COLOR_ATTACHMENT1_EXT); + + status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT); + if (status != GL_FRAMEBUFFER_COMPLETE_EXT) { + printf("fbotest3: Error: Framebuffer is incomplete!!!\n"); + } + + CheckError(__LINE__); + + glClearColor(0.5, 0.5, 1.0, 0.0); + glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); + + glEnable(GL_DEPTH_TEST); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + + glPushMatrix(); + glRotatef(30.0, 1, 0, 0); + glRotatef(Rotation, 0, 1, 0); + glutSolidTeapot(2.0); + glPopMatrix(); + + /* read from user framebuffer */ + glReadPixels(0, 0, Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer); + + /* draw to window */ + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); + glDisable(GL_DEPTH_TEST); /* in case window has depth buffer */ + glWindowPos2iARB(0, 0); + glDrawPixels(Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer); + + free(buffer); + glutSwapBuffers(); + CheckError(__LINE__); +} + + +static void +Reshape( int width, int height ) +{ + float ar = (float) width / (float) height; + + glViewport( 0, 0, width, height ); + glMatrixMode( GL_PROJECTION ); + glLoadIdentity(); + glFrustum( -ar, ar, -1.0, 1.0, 5.0, 25.0 ); + + glMatrixMode( GL_MODELVIEW ); + glLoadIdentity(); + glTranslatef( 0.0, 0.0, -15.0 ); + + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, width, height); + + Width = width; + Height = height; +} + + +static void +CleanUp(void) +{ + glDeleteFramebuffersEXT(1, &MyFB); + glDeleteRenderbuffersEXT(1, &ColorRb); + glDeleteRenderbuffersEXT(1, &DepthRb); + glDeleteTextures(1, &Tex); + assert(!glIsFramebufferEXT(MyFB)); + assert(!glIsRenderbufferEXT(ColorRb)); + assert(!glIsRenderbufferEXT(DepthRb)); + glutDestroyWindow(Win); + exit(0); +} + + +static void +Idle(void) +{ + Rotation = glutGet(GLUT_ELAPSED_TIME) * 0.1; + glutPostRedisplay(); +} + + +static void +Key( unsigned char key, int x, int y ) +{ + (void) x; + (void) y; + switch (key) { + case 'a': + Animate = !Animate; + if (Animate) + glutIdleFunc(Idle); + else + glutIdleFunc(NULL); + break; + case 27: + CleanUp(); + break; + } + glutPostRedisplay(); +} + + +static void +Init( void ) +{ + if (!glutExtensionSupported("GL_EXT_framebuffer_object")) { + printf("fbotest3: GL_EXT_framebuffer_object not found!\n"); + exit(0); + } + printf("fbotest3: GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER)); + + /* create initial tex obj as an RGBA texture */ + glGenTextures(1, &Tex); + glBindTexture(GL_TEXTURE_2D, Tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, 256, 0, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glEnable(GL_TEXTURE_2D); + + /* draw something to make sure the texture is used */ + glBegin(GL_POINTS); + glVertex2f(0, 0); + glEnd(); + + /* done w/ texturing */ + glDisable(GL_TEXTURE_2D); + + /* Create my Framebuffer Object */ + glGenFramebuffersEXT(1, &MyFB); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB); + assert(glIsFramebufferEXT(MyFB)); + + /* Setup color renderbuffer */ + glGenRenderbuffersEXT(1, &ColorRb); + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb); + assert(glIsRenderbufferEXT(ColorRb)); + glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, + GL_RENDERBUFFER_EXT, ColorRb); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height); + + /* Setup depth renderbuffer (a texture) */ + glGenRenderbuffersEXT(1, &DepthRb); + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, DepthRb); + assert(glIsRenderbufferEXT(DepthRb)); + /* replace RGBA texture with Z texture */ + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, Width, Height, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, + GL_TEXTURE_2D, Tex, 0); + + CheckError(__LINE__); + + /* restore to default */ + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); + CheckError(__LINE__); +} + + +int +main( int argc, char *argv[] ) +{ + glutInit( &argc, argv ); + glutInitWindowPosition( 0, 0 ); + glutInitWindowSize(Width, Height); + glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE ); + Win = glutCreateWindow(argv[0]); + glewInit(); + glutReshapeFunc( Reshape ); + glutKeyboardFunc( Key ); + glutDisplayFunc( Display ); + if (Animate) + glutIdleFunc(Idle); + Init(); + glutMainLoop(); + return 0; +} diff --git a/src/egl/main/egldriver.c b/src/egl/main/egldriver.c index 1dadbf783b6..a8a8e302e48 100644 --- a/src/egl/main/egldriver.c +++ b/src/egl/main/egldriver.c @@ -25,6 +25,7 @@ #include <dlfcn.h> #include <sys/types.h> #include <dirent.h> +#include <unistd.h> #endif @@ -55,21 +56,7 @@ close_library(HMODULE lib) static const char * library_suffix(void) { - return "dll"; -} - - -static EGLBoolean -make_library_path(char *buf, unsigned int size, const char *name) -{ - EGLBoolean need_suffix; - const char *suffix = ".dll"; - int ret; - - need_suffix = (strchr(name, '.') == NULL); - ret = snprintf(buf, size, "%s%s", name, (need_suffix) ? suffix : ""); - - return ((unsigned int) ret < size); + return ".dll"; } @@ -96,30 +83,13 @@ close_library(void *lib) static const char * library_suffix(void) { - return "so"; -} - - -static EGLBoolean -make_library_path(char *buf, unsigned int size, const char *name) -{ - EGLBoolean need_dir, need_suffix; - const char *suffix = ".so"; - int ret; - - need_dir = (strchr(name, '/') == NULL); - need_suffix = (strchr(name, '.') == NULL); - - ret = snprintf(buf, size, "%s%s%s", - (need_dir) ? _EGL_DRIVER_SEARCH_DIR"/" : "", name, - (need_suffix) ? suffix : ""); - - return ((unsigned int) ret < size); + return ".so"; } #else /* _EGL_PLATFORM_NO_OS */ + static const char DefaultDriverName[] = "builtin"; typedef void *lib_handle; @@ -143,14 +113,6 @@ library_suffix(void) } -static EGLBoolean -make_library_path(char *buf, unsigned int size, const char *name) -{ - int ret = snprintf(buf, size, name); - return ((unsigned int) ret < size); -} - - #endif @@ -299,122 +261,260 @@ _eglMatchDriver(_EGLDisplay *dpy) /** - * Preload a user driver. - * - * A user driver can be specified by EGL_DRIVER. + * A loader function for use with _eglPreloadForEach. The loader data is the + * filename of the driver. This function stops on the first valid driver. */ static EGLBoolean -_eglPreloadUserDriver(void) +_eglLoaderFile(const char *dir, size_t len, void *loader_data) { -#if defined(_EGL_PLATFORM_POSIX) || defined(_EGL_PLATFORM_WINDOWS) _EGLDriver *drv; char path[1024]; - char *env; - - env = getenv("EGL_DRIVER"); - if (!env) - return EGL_FALSE; + const char *filename = (const char *) loader_data; + size_t flen = strlen(filename); - if (!make_library_path(path, sizeof(path), env)) - return EGL_FALSE; + /* make a full path */ + if (len + flen + 2 > sizeof(path)) + return EGL_TRUE; + if (len) { + memcpy(path, dir, len); + path[len++] = '/'; + } + memcpy(path + len, filename, flen); + len += flen; + path[len] = '\0'; drv = _eglLoadDriver(path, NULL); - if (!drv) { - _eglLog(_EGL_WARNING, "EGL_DRIVER is set to an invalid driver"); - return EGL_FALSE; + /* fix the path and load again */ + if (!drv && library_suffix()) { + const char *suffix = library_suffix(); + size_t slen = strlen(suffix); + const char *p; + EGLBoolean need_suffix; + + p = filename + flen - slen; + need_suffix = (p < filename || strcmp(p, suffix) != 0); + if (need_suffix && len + slen + 1 <= sizeof(path)) { + strcpy(path + len, suffix); + drv = _eglLoadDriver(path, NULL); + } } + if (!drv) + return EGL_TRUE; + /* remember the driver and stop */ _eglGlobal.Drivers[_eglGlobal.NumDrivers++] = drv; - - return EGL_TRUE; -#else /* _EGL_PLATFORM_POSIX || _EGL_PLATFORM_WINDOWS */ return EGL_FALSE; -#endif } /** - * Preload display drivers. - * - * Display drivers are a set of drivers that support a certain display system. - * The display system may be specified by EGL_DISPLAY. - * - * FIXME This makes libEGL a memory hog if an user driver is not specified and - * there are many display drivers. + * A loader function for use with _eglPreloadForEach. The loader data is the + * pattern (prefix) of the files to look for. */ static EGLBoolean -_eglPreloadDisplayDrivers(void) +_eglLoaderPattern(const char *dir, size_t len, void *loader_data) { #if defined(_EGL_PLATFORM_POSIX) - const char *dpy, *suffix; - char path[1024], prefix[32]; + const char *prefix, *suffix; + size_t prefix_len, suffix_len; DIR *dirp; struct dirent *dirent; + char path[1024]; - dpy = getenv("EGL_DISPLAY"); - if (!dpy || !dpy[0]) - dpy = _EGL_DEFAULT_DISPLAY; - if (!dpy || !dpy[0]) - return EGL_FALSE; - - snprintf(prefix, sizeof(prefix), "egl_%s_", dpy); - suffix = library_suffix(); + if (len + 2 > sizeof(path)) + return EGL_TRUE; + if (len) { + memcpy(path, dir, len); + path[len++] = '/'; + } + path[len] = '\0'; - dirp = opendir(_EGL_DRIVER_SEARCH_DIR); + dirp = opendir(path); if (!dirp) - return EGL_FALSE; + return EGL_TRUE; + + prefix = (const char *) loader_data; + prefix_len = strlen(prefix); + suffix = library_suffix(); + suffix_len = (suffix) ? strlen(suffix) : 0; while ((dirent = readdir(dirp))) { _EGLDriver *drv; + size_t dirent_len = strlen(dirent->d_name); const char *p; /* match the prefix */ - if (strncmp(dirent->d_name, prefix, strlen(prefix)) != 0) + if (strncmp(dirent->d_name, prefix, prefix_len) != 0) continue; - /* match the suffix */ - p = strrchr(dirent->d_name, '.'); - if ((p && !suffix) || (!p && suffix)) - continue; - else if (p && suffix && strcmp(p + 1, suffix) != 0) - continue; - - snprintf(path, sizeof(path), - _EGL_DRIVER_SEARCH_DIR"/%s", dirent->d_name); + if (suffix) { + p = dirent->d_name + dirent_len - suffix_len; + if (p < dirent->d_name || strcmp(p, suffix) != 0) + continue; + } - drv = _eglLoadDriver(path, NULL); - if (drv) - _eglGlobal.Drivers[_eglGlobal.NumDrivers++] = drv; + /* make a full path and load the driver */ + if (len + dirent_len + 1 <= sizeof(path)) { + strcpy(path + len, dirent->d_name); + drv = _eglLoadDriver(path, NULL); + if (drv) + _eglGlobal.Drivers[_eglGlobal.NumDrivers++] = drv; + } } closedir(dirp); - return (_eglGlobal.NumDrivers > 0); + return EGL_TRUE; #else /* _EGL_PLATFORM_POSIX */ + /* stop immediately */ return EGL_FALSE; #endif } /** - * Preload the default driver. + * Run the preload function on each driver directory and return the number of + * drivers loaded. + * + * The process may end prematurely if the callback function returns false. + */ +static EGLint +_eglPreloadForEach(const char *search_path, + EGLBoolean (*loader)(const char *, size_t, void *), + void *loader_data) +{ + const char *cur, *next; + size_t len; + EGLint num_drivers = _eglGlobal.NumDrivers; + + cur = search_path; + while (cur) { + next = strchr(cur, ':'); + len = (next) ? next - cur : strlen(cur); + + if (!loader(cur, len, loader)) + break; + + cur = (next) ? next + 1 : NULL; + } + + return (_eglGlobal.NumDrivers - num_drivers); +} + + +/** + * Return a list of colon-separated driver directories. + */ +static const char * +_eglGetSearchPath(void) +{ + static const char *search_path; + +#if defined(_EGL_PLATFORM_POSIX) || defined(_EGL_PLATFORM_WINDOWS) + if (!search_path) { + static char buffer[1024]; + const char *p; + int ret; + + p = getenv("EGL_DRIVERS_PATH"); +#if defined(_EGL_PLATFORM_POSIX) + if (p && (geteuid() != getuid() || getegid() != getgid())) { + _eglLog(_EGL_DEBUG, + "ignore EGL_DRIVERS_PATH for setuid/setgid binaries"); + p = NULL; + } +#endif /* _EGL_PLATFORM_POSIX */ + + if (p) { + ret = snprintf(buffer, sizeof(buffer), + "%s:%s", p, _EGL_DRIVER_SEARCH_DIR); + if (ret > 0 && ret < sizeof(buffer)) + search_path = buffer; + } + } + if (!search_path) + search_path = _EGL_DRIVER_SEARCH_DIR; +#else + search_path = ""; +#endif + + return search_path; +} + + +/** + * Preload a user driver. + * + * A user driver can be specified by EGL_DRIVER. */ static EGLBoolean -_eglPreloadDefaultDriver(void) +_eglPreloadUserDriver(void) { - _EGLDriver *drv; - char path[1024]; + const char *search_path = _eglGetSearchPath(); + char *env; + + env = getenv("EGL_DRIVER"); +#if defined(_EGL_PLATFORM_POSIX) + if (env && strchr(env, '/')) { + search_path = ""; + if ((geteuid() != getuid() || getegid() != getgid())) { + _eglLog(_EGL_DEBUG, + "ignore EGL_DRIVER for setuid/setgid binaries"); + env = NULL; + } + } +#endif /* _EGL_PLATFORM_POSIX */ + if (!env) + return EGL_FALSE; - if (!make_library_path(path, sizeof(path), DefaultDriverName)) + if (!_eglPreloadForEach(search_path, _eglLoaderFile, (void *) env)) { + _eglLog(_EGL_WARNING, "EGL_DRIVER is set to an invalid driver"); return EGL_FALSE; + } - drv = _eglLoadDriver(path, NULL); - if (!drv) + return EGL_TRUE; +} + + +/** + * Preload display drivers. + * + * Display drivers are a set of drivers that support a certain display system. + * The display system may be specified by EGL_DISPLAY. + * + * FIXME This makes libEGL a memory hog if an user driver is not specified and + * there are many display drivers. + */ +static EGLBoolean +_eglPreloadDisplayDrivers(void) +{ + const char *dpy; + char prefix[32]; + int ret; + + dpy = getenv("EGL_DISPLAY"); + if (!dpy || !dpy[0]) + dpy = _EGL_DEFAULT_DISPLAY; + if (!dpy || !dpy[0]) return EGL_FALSE; - _eglGlobal.Drivers[_eglGlobal.NumDrivers++] = drv; + ret = snprintf(prefix, sizeof(prefix), "egl_%s_", dpy); + if (ret < 0 || ret >= sizeof(prefix)) + return EGL_FALSE; - return EGL_TRUE; + return (_eglPreloadForEach(_eglGetSearchPath(), + _eglLoaderPattern, (void *) prefix) > 0); +} + + +/** + * Preload the default driver. + */ +static EGLBoolean +_eglPreloadDefaultDriver(void) +{ + return (_eglPreloadForEach(_eglGetSearchPath(), + _eglLoaderFile, (void *) DefaultDriverName) > 0); } diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index d3084fd4283..d5ddc4a6a92 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -352,7 +352,10 @@ draw_find_shader_output(const struct draw_context *draw, /** - * Return number of the shader outputs. + * Return total number of the shader outputs. This function is similar to + * draw_current_shader_outputs() but this function also counts any extra + * vertex/geometry output attributes that may be filled in by some draw + * stages (such as AA point, AA line). * * If geometry shader is present, its output will be returned, * if not vertex shader is used. @@ -362,8 +365,9 @@ draw_num_shader_outputs(const struct draw_context *draw) { uint count = draw->vs.vertex_shader->info.num_outputs; - /* if geometry shader is present, its outputs go to te - * driver, not the vertex shaders */ + /* If a geometry shader is present, its outputs go to the + * driver, else the vertex shader's outputs. + */ if (draw->gs.geometry_shader) count = draw->gs.geometry_shader->info.num_outputs; @@ -374,7 +378,8 @@ draw_num_shader_outputs(const struct draw_context *draw) /** - * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches. + * Provide TGSI sampler objects for vertex/geometry shaders that use + * texture fetches. * This might only be used by software drivers for the time being. */ void @@ -454,14 +459,27 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) } -int draw_current_shader_outputs(struct draw_context *draw) +/** + * Return the number of output attributes produced by the geometry + * shader, if present. If no geometry shader, return the number of + * outputs from the vertex shader. + * \sa draw_num_shader_outputs + */ +uint +draw_current_shader_outputs(const struct draw_context *draw) { if (draw->gs.geometry_shader) return draw->gs.num_gs_outputs; return draw->vs.num_vs_outputs; } -int draw_current_shader_position_output(struct draw_context *draw) + +/** + * Return the index of the shader output which will contain the + * vertex position. + */ +uint +draw_current_shader_position_output(const struct draw_context *draw) { if (draw->gs.geometry_shader) return draw->gs.position_output; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index e58129b99d8..8f6ca15dfa2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -49,6 +49,10 @@ #include "draw_pipe.h" +/** Approx number of new tokens for instructions in aa_transform_inst() */ +#define NUM_NEW_TOKENS 50 + + /** * Max texture level for the alpha texture used for antialiasing */ @@ -179,12 +183,7 @@ aa_transform_decl(struct tgsi_transform_context *ctx, static int free_bit(uint bitfield) { - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) - return i; - } - return -1; + return ffs(~bitfield) - 1; } @@ -343,11 +342,10 @@ generate_aaline_fs(struct aaline_stage *aaline) const struct pipe_shader_state *orig_fs = &aaline->fs->state; struct pipe_shader_state aaline_fs; struct aa_transform_context transform; - -#define MAX 1000 + const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; aaline_fs = *orig_fs; /* copy to init */ - aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + aaline_fs.tokens = tgsi_alloc_tokens(newLen); if (aaline_fs.tokens == NULL) return FALSE; @@ -363,7 +361,7 @@ generate_aaline_fs(struct aaline_stage *aaline) tgsi_transform_shader(orig_fs->tokens, (struct tgsi_token *) aaline_fs.tokens, - MAX, &transform.base); + newLen, &transform.base); #if 0 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index d86717e5182..97f34808793 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -53,6 +53,10 @@ #include "draw_pipe.h" +/** Approx number of new tokens for instructions in aa_transform_inst() */ +#define NUM_NEW_TOKENS 200 + + /* * Enabling NORMALIZE might give _slightly_ better results. * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or @@ -81,16 +85,19 @@ struct aapoint_stage { struct draw_stage stage; - int psize_slot; + /** half of pipe_rasterizer_state::point_size */ float radius; + /** vertex attrib slot containing point size */ + int psize_slot; + /** this is the vertex attrib slot for the new texcoords */ uint tex_slot; + + /** vertex attrib slot containing position */ uint pos_slot; - /* - * Currently bound state - */ + /** Currently bound fragment shader */ struct aapoint_fragment_shader *fs; /* @@ -491,11 +498,10 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) const struct pipe_shader_state *orig_fs = &aapoint->fs->state; struct pipe_shader_state aapoint_fs; struct aa_transform_context transform; - -#define MAX 1000 + const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; aapoint_fs = *orig_fs; /* copy to init */ - aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + aapoint_fs.tokens = tgsi_alloc_tokens(newLen); if (aapoint_fs.tokens == NULL) return FALSE; @@ -511,7 +517,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) tgsi_transform_shader(orig_fs->tokens, (struct tgsi_token *) aapoint_fs.tokens, - MAX, &transform.base); + newLen, &transform.base); #if 0 /* DEBUG */ printf("draw_aapoint, orig shader:\n"); @@ -575,8 +581,8 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) const struct aapoint_stage *aapoint = aapoint_stage(stage); struct prim_header tri; struct vertex_header *v[4]; - uint texPos = aapoint->tex_slot; - uint pos_slot = aapoint->pos_slot; + const uint tex_slot = aapoint->tex_slot; + const uint pos_slot = aapoint->pos_slot; float radius, *pos, *tex; uint i; float k; @@ -643,16 +649,16 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) pos[1] += radius; /* new texcoords */ - tex = v[0]->data[texPos]; + tex = v[0]->data[tex_slot]; ASSIGN_4V(tex, -1, -1, k, 1); - tex = v[1]->data[texPos]; + tex = v[1]->data[tex_slot]; ASSIGN_4V(tex, 1, -1, k, 1); - tex = v[2]->data[texPos]; + tex = v[2]->data[tex_slot]; ASSIGN_4V(tex, 1, 1, k, 1); - tex = v[3]->data[texPos]; + tex = v[3]->data[tex_slot]; ASSIGN_4V(tex, -1, 1, k, 1); /* emit 2 tris for the quad strip */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index f047d8359c4..d0d99aa331a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -50,6 +50,9 @@ #include "draw_pipe.h" +/** Approx number of new tokens for instructions in pstip_transform_inst() */ +#define NUM_NEW_TOKENS 50 + /** * Subclass of pipe_shader_state to carry extra fragment shader info. @@ -172,12 +175,7 @@ pstip_transform_immed(struct tgsi_transform_context *ctx, static int free_bit(uint bitfield) { - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) - return i; - } - return -1; + return ffs(~bitfield) - 1; } @@ -333,11 +331,10 @@ generate_pstip_fs(struct pstip_stage *pstip) /*struct draw_context *draw = pstip->stage.draw;*/ struct pipe_shader_state pstip_fs; struct pstip_transform_context transform; - -#define MAX 1000 + const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; pstip_fs = *orig_fs; /* copy to init */ - pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + pstip_fs.tokens = tgsi_alloc_tokens(newLen); if (pstip_fs.tokens == NULL) return FALSE; @@ -352,7 +349,7 @@ generate_pstip_fs(struct pstip_stage *pstip) tgsi_transform_shader(orig_fs->tokens, (struct tgsi_token *) pstip_fs.tokens, - MAX, &transform.base); + newLen, &transform.base); #if 0 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 6a7190e9750..69466d8749d 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -280,8 +280,8 @@ void draw_gs_destroy( struct draw_context *draw ); /******************************************************************************* * Common shading code: */ -int draw_current_shader_outputs(struct draw_context *draw); -int draw_current_shader_position_output(struct draw_context *draw); +uint draw_current_shader_outputs(const struct draw_context *draw); +uint draw_current_shader_position_output(const struct draw_context *draw); /******************************************************************************* * Vertex processing (was passthrough) code: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index fbb9aa0e63a..f7a1bb74a9d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -264,6 +264,12 @@ static void micro_rcp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { +#if 0 /* for debugging */ + assert(src->f[0] != 0.0f); + assert(src->f[1] != 0.0f); + assert(src->f[2] != 0.0f); + assert(src->f[3] != 0.0f); +#endif dst->f[0] = 1.0f / src->f[0]; dst->f[1] = 1.0f / src->f[1]; dst->f[2] = 1.0f / src->f[2]; @@ -284,6 +290,12 @@ static void micro_rsq(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { +#if 0 /* for debugging */ + assert(src->f[0] != 0.0f); + assert(src->f[1] != 0.0f); + assert(src->f[2] != 0.0f); + assert(src->f[3] != 0.0f); +#endif dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); @@ -450,12 +462,20 @@ static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#define CHECK_INF_OR_NAN(chan) do {\ - assert(!util_is_inf_or_nan((chan)->f[0]));\ - assert(!util_is_inf_or_nan((chan)->f[1]));\ - assert(!util_is_inf_or_nan((chan)->f[2]));\ - assert(!util_is_inf_or_nan((chan)->f[3]));\ - } while (0) +/** + * Assert that none of the float values in 'chan' are infinite or NaN. + * NaN and Inf may occur normally during program execution and should + * not lead to crashes, etc. But when debugging, it's helpful to catch + * them. + */ +static INLINE void +check_inf_or_nan(const union tgsi_exec_channel *chan) +{ + assert(!util_is_inf_or_nan((chan)->f[0])); + assert(!util_is_inf_or_nan((chan)->f[1])); + assert(!util_is_inf_or_nan((chan)->f[2])); + assert(!util_is_inf_or_nan((chan)->f[3])); +} #ifdef DEBUG @@ -1219,8 +1239,9 @@ store_dest(struct tgsi_exec_machine *mach, int offset = 0; /* indirection offset */ int index; - if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { - CHECK_INF_OR_NAN(chan); + /* for debugging */ + if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { + check_inf_or_nan(chan); } /* There is an extra source register that indirectly subscripts @@ -1478,7 +1499,7 @@ emit_primitive(struct tgsi_exec_machine *mach) } /* - * Fetch a four texture samples using STR texture coordinates. + * Fetch four texture samples using STR texture coordinates. */ static void fetch_texel( struct tgsi_sampler *sampler, diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index fd37fc3079b..7e19e1fe36f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -284,3 +284,14 @@ tgsi_dup_tokens(const struct tgsi_token *tokens) memcpy(new_tokens, tokens, bytes); return new_tokens; } + + +/** + * Allocate memory for num_tokens tokens. + */ +struct tgsi_token * +tgsi_alloc_tokens(unsigned num_tokens) +{ + unsigned bytes = num_tokens * sizeof(struct tgsi_token); + return (struct tgsi_token *) MALLOC(bytes); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 8150e3cd29d..b45ccee2f63 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -130,6 +130,10 @@ tgsi_num_tokens(const struct tgsi_token *tokens); struct tgsi_token * tgsi_dup_tokens(const struct tgsi_token *tokens); +struct tgsi_token * +tgsi_alloc_tokens(unsigned num_tokens); + + #if defined __cplusplus } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 60a1cb1af4f..27960bac221 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -104,6 +104,8 @@ struct ureg_program struct { unsigned index; + unsigned semantic_name; + unsigned semantic_index; } gs_input[UREG_MAX_INPUT]; unsigned nr_gs_inputs; @@ -326,10 +328,14 @@ ureg_DECL_vs_input( struct ureg_program *ureg, struct ureg_src ureg_DECL_gs_input(struct ureg_program *ureg, - unsigned index) + unsigned index, + unsigned semantic_name, + unsigned semantic_index) { if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { ureg->gs_input[ureg->nr_gs_inputs].index = index; + ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name; + ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index; ureg->nr_gs_inputs++; } else { set_bad(ureg); @@ -1252,10 +1258,12 @@ static void emit_decls( struct ureg_program *ureg ) } } else { for (i = 0; i < ureg->nr_gs_inputs; i++) { - emit_decl_range(ureg, - TGSI_FILE_INPUT, - ureg->gs_input[i].index, - 1); + emit_decl(ureg, + TGSI_FILE_INPUT, + ureg->gs_input[i].index, + ureg->gs_input[i].semantic_name, + ureg->gs_input[i].semantic_index, + TGSI_INTERPOLATE_CONSTANT); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 6198ca34642..6be66d0694b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -161,7 +161,9 @@ ureg_DECL_vs_input( struct ureg_program *, struct ureg_src ureg_DECL_gs_input(struct ureg_program *, - unsigned index); + unsigned index, + unsigned semantic_name, + unsigned semantic_index); struct ureg_src ureg_DECL_system_value(struct ureg_program *, diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py index 9b0c86babdb..59c19ed98dd 100644 --- a/src/gallium/docs/source/conf.py +++ b/src/gallium/docs/source/conf.py @@ -16,13 +16,13 @@ import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.append(os.path.abspath('.')) +sys.path.append(os.path.abspath('exts')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.pngmath'] +extensions = ['sphinx.ext.pngmath', 'tgsi'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/src/gallium/docs/source/exts/tgsi.py b/src/gallium/docs/source/exts/tgsi.py new file mode 100644 index 00000000000..e92cd5c4d1b --- /dev/null +++ b/src/gallium/docs/source/exts/tgsi.py @@ -0,0 +1,17 @@ +# tgsi.py +# Sphinx extension providing formatting for TGSI opcodes +# (c) Corbin Simpson 2010 + +import docutils.nodes +import sphinx.addnodes + +def parse_opcode(env, sig, signode): + opcode, desc = sig.split("-", 1) + opcode = opcode.strip().upper() + desc = " (%s)" % desc.strip() + signode += sphinx.addnodes.desc_name(opcode, opcode) + signode += sphinx.addnodes.desc_annotation(desc, desc) + return opcode + +def setup(app): + app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", parse_opcode) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 3e57a282fd4..55a4c6990de 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -227,7 +227,7 @@ buffer_map Map a buffer into memory. -**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags. +**usage** is a bitmask of :ref:`PIPE_BUFFER_USAGE` flags. Returns a pointer to the map, or NULL if the mapping failed. @@ -248,7 +248,7 @@ Flush a range of mapped memory into a buffer. The buffer must have been mapped with ``PIPE_BUFFER_USAGE_FLUSH_EXPLICIT``. -**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags. +**usage** is a bitmask of :ref:`PIPE_BUFFER_USAGE` flags. buffer_unmap ^^^^^^^^^^^^ diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 3e702ceeda4..5478d866678 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -6,6 +6,23 @@ for describing shaders. Since Gallium is inherently shaderful, shaders are an important part of the API. TGSI is the only intermediate representation used by all drivers. +Basics +------ + +All TGSI instructions, known as *opcodes*, operate on arbitrary-precision +floating-point four-component vectors. An opcode may have up to one +destination register, known as *dst*, and between zero and three source +registers, called *src0* through *src2*, or simply *src* if there is only +one. + +Some instructions, like :opcode:`I2F`, permit re-interpretation of vector +components as integers. Other instructions permit using registers as +two-component vectors with double precision; see :ref:`Double Opcodes`. + +When an instruction has a scalar result, the result is usually copied into +each of the components of *dst*. When this happens, the result is said to be +*replicated* to *dst*. :opcode:`RCP` is one such instruction. + Instruction Set --------------- @@ -13,7 +30,7 @@ From GL_NV_vertex_program ^^^^^^^^^^^^^^^^^^^^^^^^^ -ARL - Address Register Load +.. opcode:: ARL - Address Register Load .. math:: @@ -26,7 +43,7 @@ ARL - Address Register Load dst.w = \lfloor src.w\rfloor -MOV - Move +.. opcode:: MOV - Move .. math:: @@ -39,7 +56,7 @@ MOV - Move dst.w = src.w -LIT - Light Coefficients +.. opcode:: LIT - Light Coefficients .. math:: @@ -52,33 +69,25 @@ LIT - Light Coefficients dst.w = 1 -RCP - Reciprocal - -.. math:: +.. opcode:: RCP - Reciprocal - dst.x = \frac{1}{src.x} +This instruction replicates its result. - dst.y = \frac{1}{src.x} +.. math:: - dst.z = \frac{1}{src.x} + dst = \frac{1}{src.x} - dst.w = \frac{1}{src.x} +.. opcode:: RSQ - Reciprocal Square Root -RSQ - Reciprocal Square Root +This instruction replicates its result. .. math:: - dst.x = \frac{1}{\sqrt{|src.x|}} - - dst.y = \frac{1}{\sqrt{|src.x|}} - - dst.z = \frac{1}{\sqrt{|src.x|}} + dst = \frac{1}{\sqrt{|src.x|}} - dst.w = \frac{1}{\sqrt{|src.x|}} - -EXP - Approximate Exponential Base 2 +.. opcode:: EXP - Approximate Exponential Base 2 .. math:: @@ -91,7 +100,7 @@ EXP - Approximate Exponential Base 2 dst.w = 1 -LOG - Approximate Logarithm Base 2 +.. opcode:: LOG - Approximate Logarithm Base 2 .. math:: @@ -104,7 +113,7 @@ LOG - Approximate Logarithm Base 2 dst.w = 1 -MUL - Multiply +.. opcode:: MUL - Multiply .. math:: @@ -117,7 +126,7 @@ MUL - Multiply dst.w = src0.w \times src1.w -ADD - Add +.. opcode:: ADD - Add .. math:: @@ -130,33 +139,25 @@ ADD - Add dst.w = src0.w + src1.w -DP3 - 3-component Dot Product - -.. math:: +.. opcode:: DP3 - 3-component Dot Product - dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z +This instruction replicates its result. - dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z +.. math:: - dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z - dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z +.. opcode:: DP4 - 4-component Dot Product -DP4 - 4-component Dot Product +This instruction replicates its result. .. math:: - dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - - dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - - -DST - Distance Vector +.. opcode:: DST - Distance Vector .. math:: @@ -169,7 +170,7 @@ DST - Distance Vector dst.w = src1.w -MIN - Minimum +.. opcode:: MIN - Minimum .. math:: @@ -182,7 +183,7 @@ MIN - Minimum dst.w = min(src0.w, src1.w) -MAX - Maximum +.. opcode:: MAX - Maximum .. math:: @@ -195,7 +196,7 @@ MAX - Maximum dst.w = max(src0.w, src1.w) -SLT - Set On Less Than +.. opcode:: SLT - Set On Less Than .. math:: @@ -208,7 +209,7 @@ SLT - Set On Less Than dst.w = (src0.w < src1.w) ? 1 : 0 -SGE - Set On Greater Equal Than +.. opcode:: SGE - Set On Greater Equal Than .. math:: @@ -221,7 +222,7 @@ SGE - Set On Greater Equal Than dst.w = (src0.w >= src1.w) ? 1 : 0 -MAD - Multiply And Add +.. opcode:: MAD - Multiply And Add .. math:: @@ -234,7 +235,7 @@ MAD - Multiply And Add dst.w = src0.w \times src1.w + src2.w -SUB - Subtract +.. opcode:: SUB - Subtract .. math:: @@ -247,7 +248,7 @@ SUB - Subtract dst.w = src0.w - src1.w -LRP - Linear Interpolate +.. opcode:: LRP - Linear Interpolate .. math:: @@ -260,7 +261,7 @@ LRP - Linear Interpolate dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w -CND - Condition +.. opcode:: CND - Condition .. math:: @@ -273,7 +274,7 @@ CND - Condition dst.w = (src2.w > 0.5) ? src0.w : src1.w -DP2A - 2-component Dot Product And Add +.. opcode:: DP2A - 2-component Dot Product And Add .. math:: @@ -286,7 +287,7 @@ DP2A - 2-component Dot Product And Add dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x -FRAC - Fraction +.. opcode:: FRAC - Fraction .. math:: @@ -299,7 +300,7 @@ FRAC - Fraction dst.w = src.w - \lfloor src.w\rfloor -CLAMP - Clamp +.. opcode:: CLAMP - Clamp .. math:: @@ -312,9 +313,9 @@ CLAMP - Clamp dst.w = clamp(src0.w, src1.w, src2.w) -FLR - Floor +.. opcode:: FLR - Floor -This is identical to ARL. +This is identical to :opcode:`ARL`. .. math:: @@ -327,7 +328,7 @@ This is identical to ARL. dst.w = \lfloor src.w\rfloor -ROUND - Round +.. opcode:: ROUND - Round .. math:: @@ -340,45 +341,33 @@ ROUND - Round dst.w = round(src.w) -EX2 - Exponential Base 2 +.. opcode:: EX2 - Exponential Base 2 -.. math:: +This instruction replicates its result. - dst.x = 2^{src.x} - - dst.y = 2^{src.x} +.. math:: - dst.z = 2^{src.x} + dst = 2^{src.x} - dst.w = 2^{src.x} +.. opcode:: LG2 - Logarithm Base 2 -LG2 - Logarithm Base 2 +This instruction replicates its result. .. math:: - dst.x = \log_2{src.x} - - dst.y = \log_2{src.x} - - dst.z = \log_2{src.x} + dst = \log_2{src.x} - dst.w = \log_2{src.x} +.. opcode:: POW - Power -POW - Power +This instruction replicates its result. .. math:: - dst.x = src0.x^{src1.x} + dst = src0.x^{src1.x} - dst.y = src0.x^{src1.x} - - dst.z = src0.x^{src1.x} - - dst.w = src0.x^{src1.x} - -XPD - Cross Product +.. opcode:: XPD - Cross Product .. math:: @@ -391,7 +380,7 @@ XPD - Cross Product dst.w = 1 -ABS - Absolute +.. opcode:: ABS - Absolute .. math:: @@ -404,48 +393,36 @@ ABS - Absolute dst.w = |src.w| -RCC - Reciprocal Clamped +.. opcode:: RCC - Reciprocal Clamped + +This instruction replicates its result. XXX cleanup on aisle three .. math:: - dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) - - dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) - - dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) - dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) +.. opcode:: DPH - Homogeneous Dot Product -DPH - Homogeneous Dot Product +This instruction replicates its result. .. math:: - dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w - dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w - dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w +.. opcode:: COS - Cosine - dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w - - -COS - Cosine +This instruction replicates its result. .. math:: - dst.x = \cos{src.x} - - dst.y = \cos{src.x} - - dst.z = \cos{src.x} + dst = \cos{src.x} - dst.w = \cos{src.x} - -DDX - Derivative Relative To X +.. opcode:: DDX - Derivative Relative To X .. math:: @@ -458,7 +435,7 @@ DDX - Derivative Relative To X dst.w = partialx(src.w) -DDY - Derivative Relative To Y +.. opcode:: DDY - Derivative Relative To Y .. math:: @@ -471,32 +448,32 @@ DDY - Derivative Relative To Y dst.w = partialy(src.w) -KILP - Predicated Discard +.. opcode:: KILP - Predicated Discard discard -PK2H - Pack Two 16-bit Floats +.. opcode:: PK2H - Pack Two 16-bit Floats TBD -PK2US - Pack Two Unsigned 16-bit Scalars +.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars TBD -PK4B - Pack Four Signed 8-bit Scalars +.. opcode:: PK4B - Pack Four Signed 8-bit Scalars TBD -PK4UB - Pack Four Unsigned 8-bit Scalars +.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars TBD -RFL - Reflection Vector +.. opcode:: RFL - Reflection Vector .. math:: @@ -508,10 +485,12 @@ RFL - Reflection Vector dst.w = 1 -Considered for removal. +.. note:: + + Considered for removal. -SEQ - Set On Equal +.. opcode:: SEQ - Set On Equal .. math:: @@ -524,21 +503,20 @@ SEQ - Set On Equal dst.w = (src0.w == src1.w) ? 1 : 0 -SFL - Set On False +.. opcode:: SFL - Set On False -.. math:: +This instruction replicates its result. - dst.x = 0 +.. math:: - dst.y = 0 + dst = 0 - dst.z = 0 +.. note:: - dst.w = 0 + Considered for removal. -Considered for removal. -SGT - Set On Greater Than +.. opcode:: SGT - Set On Greater Than .. math:: @@ -551,20 +529,16 @@ SGT - Set On Greater Than dst.w = (src0.w > src1.w) ? 1 : 0 -SIN - Sine +.. opcode:: SIN - Sine -.. math:: +This instruction replicates its result. - dst.x = \sin{src.x} - - dst.y = \sin{src.x} - - dst.z = \sin{src.x} +.. math:: - dst.w = \sin{src.x} + dst = \sin{src.x} -SLE - Set On Less Equal Than +.. opcode:: SLE - Set On Less Equal Than .. math:: @@ -577,7 +551,7 @@ SLE - Set On Less Equal Than dst.w = (src0.w <= src1.w) ? 1 : 0 -SNE - Set On Not Equal +.. opcode:: SNE - Set On Not Equal .. math:: @@ -590,59 +564,63 @@ SNE - Set On Not Equal dst.w = (src0.w != src1.w) ? 1 : 0 -STR - Set On True +.. opcode:: STR - Set On True -.. math:: +This instruction replicates its result. - dst.x = 1 - - dst.y = 1 - - dst.z = 1 +.. math:: - dst.w = 1 + dst = 1 -TEX - Texture Lookup +.. opcode:: TEX - Texture Lookup TBD -TXD - Texture Lookup with Derivatives +.. opcode:: TXD - Texture Lookup with Derivatives TBD -TXP - Projective Texture Lookup +.. opcode:: TXP - Projective Texture Lookup TBD -UP2H - Unpack Two 16-Bit Floats +.. opcode:: UP2H - Unpack Two 16-Bit Floats TBD - Considered for removal. +.. note:: -UP2US - Unpack Two Unsigned 16-Bit Scalars + Considered for removal. + +.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars TBD - Considered for removal. +.. note:: + + Considered for removal. -UP4B - Unpack Four Signed 8-Bit Values +.. opcode:: UP4B - Unpack Four Signed 8-Bit Values TBD - Considered for removal. +.. note:: + + Considered for removal. -UP4UB - Unpack Four Unsigned 8-Bit Scalars +.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars TBD - Considered for removal. +.. note:: -X2D - 2D Coordinate Transformation + Considered for removal. + +.. opcode:: X2D - 2D Coordinate Transformation .. math:: @@ -654,20 +632,24 @@ X2D - 2D Coordinate Transformation dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w -Considered for removal. +.. note:: + + Considered for removal. From GL_NV_vertex_program2 ^^^^^^^^^^^^^^^^^^^^^^^^^^ -ARA - Address Register Add +.. opcode:: ARA - Address Register Add TBD - Considered for removal. +.. note:: -ARR - Address Register Load With Round + Considered for removal. + +.. opcode:: ARR - Address Register Load With Round .. math:: @@ -680,26 +662,28 @@ ARR - Address Register Load With Round dst.w = round(src.w) -BRA - Branch +.. opcode:: BRA - Branch pc = target - Considered for removal. +.. note:: + + Considered for removal. -CAL - Subroutine Call +.. opcode:: CAL - Subroutine Call push(pc) pc = target -RET - Subroutine Call Return +.. opcode:: RET - Subroutine Call Return pc = pop() Potential restrictions: * Only occurs at end of function. -SSG - Set Sign +.. opcode:: SSG - Set Sign .. math:: @@ -712,7 +696,7 @@ SSG - Set Sign dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 -CMP - Compare +.. opcode:: CMP - Compare .. math:: @@ -725,7 +709,7 @@ CMP - Compare dst.w = (src0.w < 0) ? src1.w : src2.w -KIL - Conditional Discard +.. opcode:: KIL - Conditional Discard .. math:: @@ -734,7 +718,7 @@ KIL - Conditional Discard endif -SCS - Sine Cosine +.. opcode:: SCS - Sine Cosine .. math:: @@ -747,12 +731,12 @@ SCS - Sine Cosine dst.y = 1 -TXB - Texture Lookup With Bias +.. opcode:: TXB - Texture Lookup With Bias TBD -NRM - 3-component Vector Normalise +.. opcode:: NRM - 3-component Vector Normalise .. math:: @@ -765,7 +749,7 @@ NRM - 3-component Vector Normalise dst.w = 1 -DIV - Divide +.. opcode:: DIV - Divide .. math:: @@ -778,35 +762,31 @@ DIV - Divide dst.w = \frac{src0.w}{src1.w} -DP2 - 2-component Dot Product - -.. math:: - - dst.x = src0.x \times src1.x + src0.y \times src1.y +.. opcode:: DP2 - 2-component Dot Product - dst.y = src0.x \times src1.x + src0.y \times src1.y +This instruction replicates its result. - dst.z = src0.x \times src1.x + src0.y \times src1.y +.. math:: - dst.w = src0.x \times src1.x + src0.y \times src1.y + dst = src0.x \times src1.x + src0.y \times src1.y -TXL - Texture Lookup With LOD +.. opcode:: TXL - Texture Lookup With LOD TBD -BRK - Break +.. opcode:: BRK - Break TBD -IF - If +.. opcode:: IF - If TBD -BGNFOR - Begin a For-Loop +.. opcode:: BGNFOR - Begin a For-Loop dst.x = floor(src.x) dst.y = floor(src.y) @@ -819,25 +799,31 @@ BGNFOR - Begin a For-Loop Note: The destination must be a loop register. The source must be a constant register. - Considered for cleanup / removal. +.. note:: + Considered for cleanup. -REP - Repeat +.. note:: + + Considered for removal. + + +.. opcode:: REP - Repeat TBD -ELSE - Else +.. opcode:: ELSE - Else TBD -ENDIF - End If +.. opcode:: ENDIF - End If TBD -ENDFOR - End a For-Loop +.. opcode:: ENDFOR - End a For-Loop dst.x = dst.x + dst.z dst.y = dst.y - 1.0 @@ -848,30 +834,48 @@ ENDFOR - End a For-Loop Note: The destination must be a loop register. - Considered for cleanup / removal. +.. note:: + + Considered for cleanup. + +.. note:: -ENDREP - End Repeat + Considered for removal. + +.. opcode:: ENDREP - End Repeat TBD -PUSHA - Push Address Register On Stack +.. opcode:: PUSHA - Push Address Register On Stack push(src.x) push(src.y) push(src.z) push(src.w) - Considered for cleanup / removal. +.. note:: + + Considered for cleanup. + +.. note:: -POPA - Pop Address Register From Stack + Considered for removal. + +.. opcode:: POPA - Pop Address Register From Stack dst.w = pop() dst.z = pop() dst.y = pop() dst.x = pop() - Considered for cleanup / removal. +.. note:: + + Considered for cleanup. + +.. note:: + + Considered for removal. From GL_NV_gpu_program4 @@ -879,7 +883,7 @@ From GL_NV_gpu_program4 Support for these opcodes indicated by a special pipe capability bit (TBD). -CEIL - Ceiling +.. opcode:: CEIL - Ceiling .. math:: @@ -892,7 +896,7 @@ CEIL - Ceiling dst.w = \lceil src.w\rceil -I2F - Integer To Float +.. opcode:: I2F - Integer To Float .. math:: @@ -905,7 +909,7 @@ I2F - Integer To Float dst.w = (float) src.w -NOT - Bitwise Not +.. opcode:: NOT - Bitwise Not .. math:: @@ -918,7 +922,7 @@ NOT - Bitwise Not dst.w = ~src.w -TRUNC - Truncate +.. opcode:: TRUNC - Truncate .. math:: @@ -931,7 +935,7 @@ TRUNC - Truncate dst.w = trunc(src.w) -SHL - Shift Left +.. opcode:: SHL - Shift Left .. math:: @@ -944,7 +948,7 @@ SHL - Shift Left dst.w = src0.w << src1.x -SHR - Shift Right +.. opcode:: SHR - Shift Right .. math:: @@ -957,7 +961,7 @@ SHR - Shift Right dst.w = src0.w >> src1.x -AND - Bitwise And +.. opcode:: AND - Bitwise And .. math:: @@ -970,7 +974,7 @@ AND - Bitwise And dst.w = src0.w & src1.w -OR - Bitwise Or +.. opcode:: OR - Bitwise Or .. math:: @@ -983,7 +987,7 @@ OR - Bitwise Or dst.w = src0.w | src1.w -MOD - Modulus +.. opcode:: MOD - Modulus .. math:: @@ -996,7 +1000,7 @@ MOD - Modulus dst.w = src0.w \bmod src1.w -XOR - Bitwise Xor +.. opcode:: XOR - Bitwise Xor .. math:: @@ -1009,7 +1013,7 @@ XOR - Bitwise Xor dst.w = src0.w \oplus src1.w -SAD - Sum Of Absolute Differences +.. opcode:: SAD - Sum Of Absolute Differences .. math:: @@ -1022,17 +1026,17 @@ SAD - Sum Of Absolute Differences dst.w = |src0.w - src1.w| + src2.w -TXF - Texel Fetch +.. opcode:: TXF - Texel Fetch TBD -TXQ - Texture Size Query +.. opcode:: TXQ - Texture Size Query TBD -CONT - Continue +.. opcode:: CONT - Continue TBD @@ -1041,12 +1045,12 @@ From GL_NV_geometry_program4 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -EMIT - Emit +.. opcode:: EMIT - Emit TBD -ENDPRIM - End Primitive +.. opcode:: ENDPRIM - End Primitive TBD @@ -1055,66 +1059,64 @@ From GLSL ^^^^^^^^^^ -BGNLOOP - Begin a Loop +.. opcode:: BGNLOOP - Begin a Loop TBD -BGNSUB - Begin Subroutine +.. opcode:: BGNSUB - Begin Subroutine TBD -ENDLOOP - End a Loop +.. opcode:: ENDLOOP - End a Loop TBD -ENDSUB - End Subroutine +.. opcode:: ENDSUB - End Subroutine TBD -NOP - No Operation +.. opcode:: NOP - No Operation Do nothing. -NRM4 - 4-component Vector Normalise - -.. math:: +.. opcode:: NRM4 - 4-component Vector Normalise - dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} +This instruction replicates its result. - dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} - - dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} +.. math:: - dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} ps_2_x ^^^^^^^^^^^^ -CALLNZ - Subroutine Call If Not Zero +.. opcode:: CALLNZ - Subroutine Call If Not Zero TBD -IFC - If +.. opcode:: IFC - If TBD -BREAKC - Break Conditional +.. opcode:: BREAKC - Break Conditional TBD +.. _doubleopcodes: + Double Opcodes ^^^^^^^^^^^^^^^ -DADD - Add Double +.. opcode:: DADD - Add Double .. math:: @@ -1123,7 +1125,7 @@ DADD - Add Double dst.zw = src0.zw + src1.zw -DDIV - Divide Double +.. opcode:: DDIV - Divide Double .. math:: @@ -1131,7 +1133,7 @@ DDIV - Divide Double dst.zw = src0.zw / src1.zw -DSEQ - Set Double on Equal +.. opcode:: DSEQ - Set Double on Equal .. math:: @@ -1139,7 +1141,7 @@ DSEQ - Set Double on Equal dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F -DSLT - Set Double on Less than +.. opcode:: DSLT - Set Double on Less than .. math:: @@ -1147,7 +1149,7 @@ DSLT - Set Double on Less than dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F -DFRAC - Double Fraction +.. opcode:: DFRAC - Double Fraction .. math:: @@ -1156,7 +1158,7 @@ DFRAC - Double Fraction dst.zw = src.zw - \lfloor src.zw\rfloor -DFRACEXP - Convert Double Number to Fractional and Integral Components +.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components .. math:: @@ -1164,7 +1166,7 @@ DFRACEXP - Convert Double Number to Fractional and Integral Components dst0.zw = frexp(src.zw, dst1.zw) -DLDEXP - Multiple Double Number by Integral Power of 2 +.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2 .. math:: @@ -1172,7 +1174,7 @@ DLDEXP - Multiple Double Number by Integral Power of 2 dst.zw = ldexp(src0.zw, src1.zw) -DMIN - Minimum Double +.. opcode:: DMIN - Minimum Double .. math:: @@ -1180,7 +1182,7 @@ DMIN - Minimum Double dst.zw = min(src0.zw, src1.zw) -DMAX - Maximum Double +.. opcode:: DMAX - Maximum Double .. math:: @@ -1188,7 +1190,7 @@ DMAX - Maximum Double dst.zw = max(src0.zw, src1.zw) -DMUL - Multiply Double +.. opcode:: DMUL - Multiply Double .. math:: @@ -1197,7 +1199,7 @@ DMUL - Multiply Double dst.zw = src0.zw \times src1.zw -DMAD - Multiply And Add Doubles +.. opcode:: DMAD - Multiply And Add Doubles .. math:: @@ -1206,7 +1208,7 @@ DMAD - Multiply And Add Doubles dst.zw = src0.zw \times src1.zw + src2.zw -DRCP - Reciprocal Double +.. opcode:: DRCP - Reciprocal Double .. math:: @@ -1214,7 +1216,7 @@ DRCP - Reciprocal Double dst.zw = \frac{1}{src.zw} -DSQRT - Square root double +.. opcode:: DSQRT - Square root double .. math:: @@ -1269,20 +1271,8 @@ Keywords discard Discard fragment. - dst First destination register. - - dst0 First destination register. - pc Program counter. - src First source register. - - src0 First source register. - - src1 Second source register. - - src2 Third source register. - target Label of target instruction. @@ -1441,3 +1431,43 @@ GL_ARB_fragment_coord_conventions extension. DirectX 9 uses INTEGER. DirectX 10 uses HALF_INTEGER. + + + +Texture Sampling and Texture Formats +------------------------------------ + +This table shows how texture image components are returned as (x,y,z,w) tuples +by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and +:opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as +well. + ++--------------------+--------------+--------------------+--------------+ +| Texture Components | Gallium | OpenGL | Direct3D 9 | ++====================+==============+====================+==============+ +| R | XXX TBD | (r, 0, 0, 1) | (r, 1, 1, 1) | ++--------------------+--------------+--------------------+--------------+ +| RG | XXX TBD | (r, g, 0, 1) | (r, g, 1, 1) | ++--------------------+--------------+--------------------+--------------+ +| RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) | ++--------------------+--------------+--------------------+--------------+ +| RGBA | (r, g, b, a) | (r, g, b, a) | (r, g, b, a) | ++--------------------+--------------+--------------------+--------------+ +| A | (0, 0, 0, a) | (0, 0, 0, a) | (0, 0, 0, a) | ++--------------------+--------------+--------------------+--------------+ +| L | (l, l, l, 1) | (l, l, l, 1) | (l, l, l, 1) | ++--------------------+--------------+--------------------+--------------+ +| LA | (l, l, l, a) | (l, l, l, a) | (l, l, l, a) | ++--------------------+--------------+--------------------+--------------+ +| I | (i, i, i, i) | (i, i, i, i) | N/A | ++--------------------+--------------+--------------------+--------------+ +| UV | XXX TBD | (0, 0, 0, 1) | (u, v, 1, 1) | +| | | [#envmap-bumpmap]_ | | ++--------------------+--------------+--------------------+--------------+ +| Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) | +| | | [#depth-tex-mode]_ | | ++--------------------+--------------+--------------------+--------------+ + +.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt +.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z) + or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE. diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 0155b9be501..353ae176fdb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -65,7 +65,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_current_shader_outputs(llvmpipe->draw); + const uint num = draw_num_shader_outputs(llvmpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 60ea9c171d5..39bcdc8fe60 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -77,17 +77,21 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { - unsigned i; + unsigned i, colorbuf_count = 0; /* Mark the outputs as not present initially */ - compiler->OutputColor = fs->info.num_outputs; + compiler->OutputColor[0] = fs->info.num_outputs; + compiler->OutputColor[1] = fs->info.num_outputs; + compiler->OutputColor[2] = fs->info.num_outputs; + compiler->OutputColor[3] = fs->info.num_outputs; compiler->OutputDepth = fs->info.num_outputs; /* Now see where they really are. */ for(i = 0; i < fs->info.num_outputs; ++i) { switch(fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: - compiler->OutputColor = i; + compiler->OutputColor[colorbuf_count] = i; + colorbuf_count++; break; case TGSI_SEMANTIC_POSITION: compiler->OutputDepth = i; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 453fb1accc2..b37be261337 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -37,24 +37,31 @@ unsigned r300_texture_get_stride(struct r300_screen* screen, unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, unsigned zslice, unsigned face); -/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ +/* Translate a pipe_format into a useful texture format for sampling. + * + * R300_EASY_TX_FORMAT swizzles the texture. + * Note the signature of R300_EASY_TX_FORMAT: + * R300_EASY_TX_FORMAT(B, G, R, A, FORMAT); + * + * The FORMAT specifies how the texture sampler will treat the texture, and + * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { switch (format) { /* X8 */ case PIPE_FORMAT_A8_UNORM: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8); case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); case PIPE_FORMAT_L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); /* X16 */ case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); case PIPE_FORMAT_R16_SNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16) | R300_TX_FORMAT_SIGNED; - case PIPE_FORMAT_Z16_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, X16); /* Y8X8 */ case PIPE_FORMAT_A8L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index a792c2cf989..941ec17016b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -201,6 +201,8 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { + unsigned i, j; + dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->RelAddr = src->Register.Indirect; @@ -210,6 +212,21 @@ static void transform_srcreg( dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; dst->Abs = src->Register.Absolute; dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; + + if (src->Register.File == TGSI_FILE_IMMEDIATE) { + for (i = 0; i < ttr->imms_to_swizzle_count; i++) { + if (ttr->imms_to_swizzle[i].index == src->Register.Index) { + dst->File = RC_FILE_TEMPORARY; + dst->Index = 0; + dst->Swizzle = 0; + for (j = 0; j < 4; j++) { + dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle, + tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3); + } + break; + } + } + } } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src, @@ -277,21 +294,45 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst &ttr->compiler->Program.ShadowSamplers); } -static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) +static void handle_immediate(struct tgsi_to_rc * ttr, + struct tgsi_full_immediate * imm, + unsigned index) { struct rc_constant constant; - int i; + unsigned swizzle = 0; + boolean can_swizzle = TRUE; + unsigned i; - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.Size = 4; - for(i = 0; i < 4; ++i) - constant.u.Immediate[i] = imm->u[i].Float; - rc_constants_add(&ttr->compiler->Program.Constants, &constant); + for (i = 0; i < 4; i++) { + if (imm->u[i].Float == 0.0f) { + swizzle |= RC_SWIZZLE_ZERO << (i * 3); + } else if (imm->u[i].Float == 0.5f) { + swizzle |= RC_SWIZZLE_HALF << (i * 3); + } else if (imm->u[i].Float == 1.0f) { + swizzle |= RC_SWIZZLE_ONE << (i * 3); + } else { + can_swizzle = FALSE; + break; + } + } + + if (can_swizzle) { + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index; + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle; + ttr->imms_to_swizzle_count++; + } else { + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + for(i = 0; i < 4; ++i) + constant.u.Immediate[i] = imm->u[i].Float; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); + } } void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) { struct tgsi_parse_context parser; + unsigned imm_index = 0; int i; /* Allocate constants placeholders. @@ -308,6 +349,9 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) ttr->immediate_offset = ttr->compiler->Program.Constants.Count; + ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms)); + ttr->imms_to_swizzle_count = 0; + tgsi_parse_init(&parser, tokens); while (!tgsi_parse_end_of_tokens(&parser)) { @@ -317,7 +361,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) case TGSI_TOKEN_TYPE_DECLARATION: break; case TGSI_TOKEN_TYPE_IMMEDIATE: - handle_immediate(ttr, &parser.FullToken.FullImmediate); + handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index); + imm_index++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: transform_instruction(ttr, &parser.FullToken.FullInstruction); @@ -327,6 +372,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) tgsi_parse_free(&parser); + free(ttr->imms_to_swizzle); + rc_calculate_inputs_outputs(ttr->compiler); } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 93e90ec6d2c..39b473c7bf5 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -29,11 +29,18 @@ struct tgsi_full_declaration; struct tgsi_shader_info; struct tgsi_token; +struct swizzled_imms { + unsigned index; + unsigned swizzle; +}; + struct tgsi_to_rc { struct radeon_compiler * compiler; const struct tgsi_shader_info * info; int immediate_offset; + struct swizzled_imms * imms_to_swizzle; + unsigned imms_to_swizzle_count; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f6856a5f691..d2eda7324ca 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -30,7 +30,6 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" -#include "draw/draw_private.h" #include "sp_context.h" #include "sp_screen.h" #include "sp_state.h" @@ -67,7 +66,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_current_shader_outputs(softpipe->draw); + const uint num = draw_num_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index e88ef8d8fe3..c4181c3f5b7 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -215,7 +215,6 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->state.hw_draw.num_views = 0; svga->dirty = ~0; - svga->state.white_fs_id = SVGA3D_INVALID_ID; LIST_INITHEAD(&svga->dirty_buffers); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 66259fd0103..ba86256eb26 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -327,10 +327,6 @@ struct svga_context unsigned texture_timestamp; - /* Internally generated shaders: - */ - unsigned white_fs_id; - /* */ struct svga_sw_state sw; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index adc7120217b..2973444d0ab 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -108,70 +108,6 @@ fail: return ret; } -/* The blend workaround for simulating logicop xor behaviour requires - * that the incoming fragment color be white. This change achieves - * that by hooking up a hard-wired fragment shader that just emits - * color 1,1,1,1 - * - * This is a slightly incomplete solution as it assumes that the - * actual bound shader has no other effects beyond generating a - * fragment color. In particular shaders containing TEXKIL and/or - * depth-write will not have the correct behaviour, nor will those - * expecting to use alphatest. - * - * These are avoidable issues, but they are not much worse than the - * unavoidable ones associated with this technique, so it's not clear - * how much effort should be expended trying to resolve them - the - * ultimate result will still not be correct in most cases. - * - * Shader below was generated with: - * SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt - */ -static int emit_white_fs( struct svga_context *svga ) -{ - int ret = PIPE_ERROR; - - /* ps_3_0 - * def c0, 1.000000, 0.000000, 0.000000, 1.000000 - * mov oC0, c0.x - * end - */ - static const unsigned white_tokens[] = { - 0xffff0300, - 0x05000051, - 0xa00f0000, - 0x3f800000, - 0x00000000, - 0x00000000, - 0x3f800000, - 0x02000001, - 0x800f0800, - 0xa0000000, - 0x0000ffff, - }; - - assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); - svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); - if(svga->state.white_fs_id == SVGA3D_INVALID_ID) - goto no_fs_id; - - ret = SVGA3D_DefineShader(svga->swc, - svga->state.white_fs_id, - SVGA3D_SHADERTYPE_PS, - white_tokens, - sizeof(white_tokens)); - if (ret) - goto no_definition; - - return 0; - -no_definition: - util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); - svga->state.white_fs_id = SVGA3D_INVALID_ID; -no_fs_id: - return ret; -} - /* SVGA_NEW_TEXTURE_BINDING * SVGA_NEW_RAST @@ -199,6 +135,23 @@ static int make_fs_key( const struct svga_context *svga, PIPE_WINDING_CW); } + /* The blend workaround for simulating logicop xor behaviour + * requires that the incoming fragment color be white. This change + * achieves that by creating a varient of the current fragment + * shader that overrides all output colors with 1,1,1,1 + * + * This will work for most shaders, including those containing + * TEXKIL and/or depth-write. However, it will break on the + * combination of xor-logicop plus alphatest. + * + * Ultimately, we could implement alphatest in the shader using + * texkil prior to overriding the outgoing fragment color. + * + * SVGA_NEW_BLEND + */ + if (svga->curr.blend->need_white_fragments) { + key->white_fragments = 1; + } /* XXX: want to limit this to the textures that the shader actually * refers to. @@ -238,40 +191,29 @@ static int emit_hw_fs( struct svga_context *svga, unsigned id = SVGA3D_INVALID_ID; int ret = 0; + struct svga_fragment_shader *fs = svga->curr.fs; + struct svga_fs_compile_key key; + /* SVGA_NEW_BLEND + * SVGA_NEW_TEXTURE_BINDING + * SVGA_NEW_RAST + * SVGA_NEW_NEED_SWTNL + * SVGA_NEW_SAMPLER */ - if (svga->curr.blend->need_white_fragments) { - if (svga->state.white_fs_id == SVGA3D_INVALID_ID) { - ret = emit_white_fs( svga ); - if (ret) - return ret; - } - id = svga->state.white_fs_id; - } - else { - struct svga_fragment_shader *fs = svga->curr.fs; - struct svga_fs_compile_key key; - - /* SVGA_NEW_TEXTURE_BINDING - * SVGA_NEW_RAST - * SVGA_NEW_NEED_SWTNL - * SVGA_NEW_SAMPLER - */ - ret = make_fs_key( svga, &key ); + ret = make_fs_key( svga, &key ); + if (ret) + return ret; + + result = search_fs_key( fs, &key ); + if (!result) { + ret = compile_fs( svga, fs, &key, &result ); if (ret) return ret; - - result = search_fs_key( fs, &key ); - if (!result) { - ret = compile_fs( svga, fs, &key, &result ); - if (ret) - return ret; - } - - assert (result); - id = result->id; } + assert (result); + id = result->id; + assert(id != SVGA3D_INVALID_ID); if (result != svga->state.hw_draw.fs) { diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 737a2213af5..063c9cf4221 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -49,6 +49,7 @@ struct svga_fs_compile_key { unsigned light_twoside:1; unsigned front_cw:1; + unsigned white_fragments:1; unsigned num_textures:8; unsigned num_unnormalized_coords:8; struct { diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index 43fc0d32359..73102a72a83 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -194,8 +194,19 @@ static boolean ps30_output( struct svga_shader_emitter *emit, switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: - emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, - semantic.Index ); + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fkey.white_fragments) { + + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_col[idx] = emit->output_map[idx]; + emit->true_col[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + } + else { + emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + } break; case TGSI_SEMANTIC_POSITION: emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 2557824293e..e8f75485d55 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -79,6 +79,8 @@ struct svga_shader_emitter int ps30_input_count; + int dynamic_branching_level; + boolean in_main_func; boolean created_zero_immediate; @@ -199,6 +201,23 @@ static INLINE boolean emit_op3( struct svga_shader_emitter *emit, } +static INLINE boolean emit_op4( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2, + struct src_register src3) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 ) && + emit_src( emit, src1 ) && + emit_src( emit, src2 ) && + emit_src( emit, src3 )); +} + + #define TRANSLATE_SWIZZLE(x,y,z,w) ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6)) #define SWIZZLE_XYZW \ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W) diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index dc5eb8fc606..be821e98217 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -46,8 +46,6 @@ translate_opcode( case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC; - case TGSI_OPCODE_DDX: return SVGA3DOP_DSX; - case TGSI_OPCODE_DDY: return SVGA3DOP_DSY; case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; @@ -415,6 +413,88 @@ static boolean submit_op3( struct svga_shader_emitter *emit, } + + +/* SVGA shaders may not refer to >1 constant register in a single + * instruction. This function checks for that usage and inserts a + * move to temporary if detected. + */ +static boolean submit_op4( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2, + struct src_register src3) +{ + SVGA3dShaderDestToken temp0; + SVGA3dShaderDestToken temp3; + boolean need_temp0 = FALSE; + boolean need_temp3 = FALSE; + SVGA3dShaderRegType type0, type1, type2, type3; + + temp0.value = 0; + temp3.value = 0; + type0 = SVGA3dShaderGetRegType( src0.base.value ); + type1 = SVGA3dShaderGetRegType( src1.base.value ); + type2 = SVGA3dShaderGetRegType( src2.base.value ); + type3 = SVGA3dShaderGetRegType( src2.base.value ); + + /* Make life a little easier - this is only used by the TXD + * instruction which is guaranteed not to have a constant/input reg + * in one slot at least: + */ + assert(type1 == SVGA3DREG_SAMPLER); + + if (type0 == SVGA3DREG_CONST && + ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || + (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type3 == SVGA3DREG_CONST && + (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num)) + need_temp3 = TRUE; + + if (type0 == SVGA3DREG_INPUT && + ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) || + (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type3 == SVGA3DREG_INPUT && + (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) + need_temp3 = TRUE; + + if (need_temp0) + { + temp0 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + return FALSE; + + src0 = src( temp0 ); + } + + if (need_temp3) + { + temp3 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 )) + return FALSE; + + src3 = src( temp3 ); + } + + if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) + return FALSE; + + if (need_temp3) + release_temp( emit, temp3 ); + if (need_temp0) + release_temp( emit, temp0 ); + return TRUE; +} + + static boolean emit_def_const( struct svga_shader_emitter *emit, SVGA3dShaderConstType type, unsigned idx, @@ -660,6 +740,8 @@ static boolean emit_if(struct svga_shader_emitter *emit, if_token.control = SVGA3DOPCOMPC_NE; zero = scalar(zero, TGSI_SWIZZLE_X); + emit->dynamic_branching_level++; + return (emit_instruction( emit, if_token ) && emit_src( emit, src ) && emit_src( emit, zero ) ); @@ -668,6 +750,8 @@ static boolean emit_if(struct svga_shader_emitter *emit, static boolean emit_endif(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { + emit->dynamic_branching_level--; + return (emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ))); } @@ -1011,10 +1095,10 @@ static boolean emit_kilp(struct svga_shader_emitter *emit, { SVGA3dShaderInstToken inst; SVGA3dShaderDestToken temp; - struct src_register one = get_zero_immediate( emit ); + struct src_register one = scalar( get_zero_immediate( emit ), + TGSI_SWIZZLE_W ); inst = inst_token( SVGA3DOP_TEXKILL ); - one = scalar( one, TGSI_SWIZZLE_W ); /* texkill doesn't allow negation on the operand so lets move * negation of {1} to a temp register */ @@ -1169,41 +1253,79 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst ) { SVGA3dShaderInstToken inst; - struct src_register src0; - struct src_register src1; - + struct src_register texcoord; + struct src_register sampler; + SVGA3dShaderDestToken tmp; + inst.value = 0; - inst.op = SVGA3DOP_TEX; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_TEX: + inst.op = SVGA3DOP_TEX; break; case TGSI_OPCODE_TXP: + inst.op = SVGA3DOP_TEX; inst.control = SVGA3DOPCONT_PROJECT; break; case TGSI_OPCODE_TXB: + inst.op = SVGA3DOP_TEX; inst.control = SVGA3DOPCONT_BIAS; break; + case TGSI_OPCODE_TXL: + inst.op = SVGA3DOP_TEXLDL; + break; default: assert(0); return FALSE; } - src0 = translate_src_register( emit, &insn->Src[0] ); - src1 = translate_src_register( emit, &insn->Src[1] ); + texcoord = translate_src_register( emit, &insn->Src[0] ); + sampler = translate_src_register( emit, &insn->Src[1] ); - if (emit->key.fkey.tex[src1.base.num].unnormalized) { - struct src_register wh = get_tex_dimensions( emit, src1.base.num ); - SVGA3dShaderDestToken tmp = get_temp( emit ); + if (emit->key.fkey.tex[sampler.base.num].unnormalized || + emit->dynamic_branching_level > 0) + tmp = get_temp( emit ); + + /* Can't do mipmapping inside dynamic branch constructs. Force LOD + * zero in that case. + */ + if (emit->dynamic_branching_level > 0 && + inst.op == SVGA3DOP_TEX && + SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { + struct src_register zero = get_zero_immediate( emit ); + + /* MOV tmp, texcoord */ + if (!submit_op1( emit, + inst_token( SVGA3DOP_MOV ), + tmp, + texcoord )) + return FALSE; + + /* MOV tmp.w, zero */ + if (!submit_op1( emit, + inst_token( SVGA3DOP_MOV ), + writemask( tmp, TGSI_WRITEMASK_W ), + scalar( zero, TGSI_SWIZZLE_X ))) + return FALSE; + + texcoord = src( tmp ); + inst.op = SVGA3DOP_TEXLDL; + } + + /* Explicit normalization of texcoords: + */ + if (emit->key.fkey.tex[sampler.base.num].unnormalized) { + struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); /* MUL tmp, SRC0, WH */ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), - tmp, src0, wh )) + tmp, texcoord, wh )) return FALSE; - src0 = src( tmp ); + + texcoord = src( tmp ); } - return submit_op2( emit, inst, dst, src0, src1 ); + return submit_op2( emit, inst, dst, texcoord, sampler ); } @@ -1211,31 +1333,33 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, /* Translate texture instructions to SVGA3D representation. */ -static boolean emit_tex3(struct svga_shader_emitter *emit, +static boolean emit_tex4(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, SVGA3dShaderDestToken dst ) { SVGA3dShaderInstToken inst; - struct src_register src0; - struct src_register src1; - struct src_register src2; + struct src_register texcoord; + struct src_register ddx; + struct src_register ddy; + struct src_register sampler; + + texcoord = translate_src_register( emit, &insn->Src[0] ); + ddx = translate_src_register( emit, &insn->Src[1] ); + ddy = translate_src_register( emit, &insn->Src[2] ); + sampler = translate_src_register( emit, &insn->Src[3] ); inst.value = 0; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_TXD: - inst.op = SVGA3DOP_TEXLDD; - break; - case TGSI_OPCODE_TXL: - inst.op = SVGA3DOP_TEXLDL; + inst.op = SVGA3DOP_TEXLDD; /* 4 args! */ break; + default: + assert(0); + return FALSE; } - src0 = translate_src_register( emit, &insn->Src[0] ); - src1 = translate_src_register( emit, &insn->Src[1] ); - src2 = translate_src_register( emit, &insn->Src[2] ); - - return submit_op3( emit, inst, dst, src0, src1, src2 ); + return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy ); } @@ -1271,12 +1395,12 @@ static boolean emit_tex(struct svga_shader_emitter *emit, case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXL: if (!emit_tex2( emit, insn, tex_result )) return FALSE; break; - case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXD: - if (!emit_tex3( emit, insn, tex_result )) + if (!emit_tex4( emit, insn, tex_result )) return FALSE; break; default: @@ -1330,6 +1454,8 @@ static boolean emit_bgnloop2( struct svga_shader_emitter *emit, struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); struct src_register const_int = get_loop_const( emit ); + emit->dynamic_branching_level++; + return (emit_instruction( emit, inst ) && emit_src( emit, loop_reg ) && emit_src( emit, const_int ) ); @@ -1339,6 +1465,9 @@ static boolean emit_endloop2( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); + + emit->dynamic_branching_level--; + return emit_instruction( emit, inst ); } @@ -1398,6 +1527,46 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit, } } + +static boolean emit_deriv(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + if (emit->dynamic_branching_level > 0 && + insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) + { + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderDestToken dst = + translate_dst_register( emit, insn, 0 ); + + /* Deriv opcodes not valid inside dynamic branching, workaround + * by zeroing out the destination. + */ + if (!submit_op1(emit, + inst_token( SVGA3DOP_MOV ), + dst, + scalar(zero, TGSI_SWIZZLE_X))) + return FALSE; + + return TRUE; + } + else { + unsigned opcode; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DDX: + opcode = SVGA3DOP_DSX; + break; + case TGSI_OPCODE_DDY: + opcode = SVGA3DOP_DSY; + break; + default: + return FALSE; + } + + return emit_simple_instruction( emit, opcode, insn ); + } +} + static boolean emit_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { @@ -2002,6 +2171,10 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_TXD: return emit_tex( emit, insn ); + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + return emit_deriv( emit, insn ); + case TGSI_OPCODE_BGNSUB: return emit_bgnsub( emit, position, insn ); @@ -2254,11 +2427,28 @@ static boolean emit_ps_postamble( struct svga_shader_emitter *emit ) for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { - if (!submit_op1( emit, - inst_token(SVGA3DOP_MOV), - emit->true_col[i], - src(emit->temp_col[i]) )) - return FALSE; + /* Potentially override output colors with white for XOR + * logicop workaround. + */ + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fkey.white_fragments) { + + struct src_register one = scalar( get_zero_immediate( emit ), + TGSI_SWIZZLE_W ); + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + one )) + return FALSE; + } + else { + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + src(emit->temp_col[i]) )) + return FALSE; + } } } @@ -2467,6 +2657,9 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) if (emit->key.fkey.light_twoside) return TRUE; + if (emit->key.fkey.white_fragments) + return TRUE; + if (emit->emit_frontface) return TRUE; @@ -2476,6 +2669,10 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) } if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || @@ -2702,6 +2899,8 @@ boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, goto done; } + assert(emit->dynamic_branching_level == 0); + /* Need to terminate the whole shader: */ ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 8a73e81d4ac..30e2c347bd4 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -1087,8 +1087,8 @@ egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, EGLint buffer) { struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); - _EGLContext *ctx = _eglGetAPIContext(EGL_OPENGL_ES_API); - struct egl_g3d_context *gctx = egl_g3d_context(ctx); + _EGLContext *es1 = _eglGetAPIContext(EGL_OPENGL_ES_API); + struct egl_g3d_context *gctx; enum pipe_format target_format; int target; @@ -1118,6 +1118,11 @@ egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy, return _eglError(EGL_BAD_MATCH, "eglBindTexImage"); } + if (!es1) + return EGL_TRUE; + if (!gsurf->render_surface) + return EGL_FALSE; + /* flush properly if the surface is bound */ if (gsurf->base.CurrentContext) { gctx = egl_g3d_context(gsurf->base.CurrentContext); @@ -1125,14 +1130,11 @@ egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); } - if (gctx) { - if (!gsurf->render_surface) - return EGL_FALSE; + gctx = egl_g3d_context(es1); + gctx->stapi->st_bind_texture_surface(gsurf->render_surface, + target, gsurf->base.MipmapLevel, target_format); - gctx->stapi->st_bind_texture_surface(gsurf->render_surface, - target, gsurf->base.MipmapLevel, target_format); - gsurf->base.BoundToTexture = EGL_TRUE; - } + gsurf->base.BoundToTexture = EGL_TRUE; return EGL_TRUE; } diff --git a/src/gallium/state_trackers/vega/asm_fill.h b/src/gallium/state_trackers/vega/asm_fill.h index 2f394ad6c55..27773467fa8 100644 --- a/src/gallium/state_trackers/vega/asm_fill.h +++ b/src/gallium/state_trackers/vega/asm_fill.h @@ -27,166 +27,375 @@ #ifndef ASM_FILL_H #define ASM_FILL_H -static const char solid_fill_asm[] = - "MOV %s, CONST[0]\n"; - - -static const char linear_grad_asm[] = - "MOV TEMP[0].xy, IN[0]\n" - "MOV TEMP[0].z, CONST[1].yyyy\n" - "DP3 TEMP[1], CONST[2], TEMP[0]\n" - "DP3 TEMP[2], CONST[3], TEMP[0]\n" - "DP3 TEMP[3], CONST[4], TEMP[0]\n" - "RCP TEMP[3], TEMP[3]\n" - "MUL TEMP[1], TEMP[1], TEMP[3]\n" - "MUL TEMP[2], TEMP[2], TEMP[3]\n" - "MOV TEMP[4].x, TEMP[1]\n" - "MOV TEMP[4].y, TEMP[2]\n" - "MUL TEMP[0], CONST[0].yyyy, TEMP[4].yyyy\n" - "MAD TEMP[1], CONST[0].xxxx, TEMP[4].xxxx, TEMP[0]\n" - "MUL TEMP[2], TEMP[1], CONST[0].zzzz\n" - "TEX %s, TEMP[2], SAMP[0], 1D\n"; - -static const char radial_grad_asm[] = - "MOV TEMP[0].xy, IN[0]\n" - "MOV TEMP[0].z, CONST[1].yyyy\n" - "DP3 TEMP[1], CONST[2], TEMP[0]\n" - "DP3 TEMP[2], CONST[3], TEMP[0]\n" - "DP3 TEMP[3], CONST[4], TEMP[0]\n" - "RCP TEMP[3], TEMP[3]\n" - "MUL TEMP[1], TEMP[1], TEMP[3]\n" - "MUL TEMP[2], TEMP[2], TEMP[3]\n" - "MOV TEMP[5].x, TEMP[1]\n" - "MOV TEMP[5].y, TEMP[2]\n" - "MUL TEMP[0], CONST[0].yyyy, TEMP[5].yyyy\n" - "MAD TEMP[1], CONST[0].xxxx, TEMP[5].xxxx, TEMP[0]\n" - "ADD TEMP[1], TEMP[1], TEMP[1]\n" - "MUL TEMP[3], TEMP[5].yyyy, TEMP[5].yyyy\n" - "MAD TEMP[4], TEMP[5].xxxx, TEMP[5].xxxx, TEMP[3]\n" - "MOV TEMP[4], -TEMP[4]\n" - "MUL TEMP[2], CONST[0].zzzz, TEMP[4]\n" - "MUL TEMP[0], CONST[1].wwww, TEMP[2]\n" - "MUL TEMP[3], TEMP[1], TEMP[1]\n" - "SUB TEMP[2], TEMP[3], TEMP[0]\n" - "RSQ TEMP[2], |TEMP[2]|\n" - "RCP TEMP[2], TEMP[2]\n" - "SUB TEMP[1], TEMP[2], TEMP[1]\n" - "ADD TEMP[0], CONST[0].zzzz, CONST[0].zzzz\n" - "RCP TEMP[0], TEMP[0]\n" - "MUL TEMP[2], TEMP[1], TEMP[0]\n" - "TEX %s, TEMP[2], SAMP[0], 1D\n"; - -static const char pattern_asm[] = - "MOV TEMP[0].xy, IN[0]\n" - "MOV TEMP[0].z, CONST[1].yyyy\n" - "DP3 TEMP[1], CONST[2], TEMP[0]\n" - "DP3 TEMP[2], CONST[3], TEMP[0]\n" - "DP3 TEMP[3], CONST[4], TEMP[0]\n" - "RCP TEMP[3], TEMP[3]\n" - "MUL TEMP[1], TEMP[1], TEMP[3]\n" - "MUL TEMP[2], TEMP[2], TEMP[3]\n" - "MOV TEMP[4].x, TEMP[1]\n" - "MOV TEMP[4].y, TEMP[2]\n" - "RCP TEMP[0], CONST[1].zwzw\n" - "MOV TEMP[1], TEMP[4]\n" - "MUL TEMP[1].x, TEMP[1], TEMP[0]\n" - "MUL TEMP[1].y, TEMP[1], TEMP[0]\n" - "TEX %s, TEMP[1], SAMP[0], 2D\n"; - - -static const char mask_asm[] = - "TEX TEMP[1], IN[0], SAMP[1], 2D\n" - "MUL TEMP[0].w, TEMP[0].wwww, TEMP[1].wwww\n" - "MOV %s, TEMP[0]\n"; - - -static const char image_normal_asm[] = - "TEX %s, IN[1], SAMP[3], 2D\n"; - -static const char image_multiply_asm[] = - "TEX TEMP[1], IN[1], SAMP[3], 2D\n" - "MUL %s, TEMP[0], TEMP[1]\n"; - -static const char image_stencil_asm[] = - "TEX TEMP[1], IN[1], SAMP[3], 2D\n" - "MUL %s, TEMP[0], TEMP[1]\n"; - - -#define EXTENDED_BLEND_OVER \ - "SUB TEMP[3], CONST[1].yyyy, TEMP[1].wwww\n" \ - "SUB TEMP[4], CONST[1].yyyy, TEMP[0].wwww\n" \ - "MUL TEMP[3], TEMP[0], TEMP[3]\n" \ - "MUL TEMP[4], TEMP[1], TEMP[4]\n" \ - "ADD TEMP[3], TEMP[3], TEMP[4]\n" - -static const char blend_multiply_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - EXTENDED_BLEND_OVER - "MUL TEMP[4], TEMP[0], TEMP[1]\n" - "ADD TEMP[1], TEMP[4], TEMP[3]\n"/*result.rgb*/ - "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n" - "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n" - "SUB TEMP[1].w, TEMP[3], TEMP[2]\n" - "MOV %s, TEMP[1]\n"; -#if 1 -static const char blend_screen_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - "ADD TEMP[3], TEMP[0], TEMP[1]\n" - "MUL TEMP[2], TEMP[0], TEMP[1]\n" - "SUB %s, TEMP[3], TEMP[2]\n"; -#else -static const char blend_screen_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - "MOV %s, TEMP[1]\n"; -#endif - -static const char blend_darken_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - EXTENDED_BLEND_OVER - "MUL TEMP[4], TEMP[0], TEMP[1].wwww\n" - "MUL TEMP[5], TEMP[1], TEMP[0].wwww\n" - "MIN TEMP[4], TEMP[4], TEMP[5]\n" - "ADD TEMP[1], TEMP[3], TEMP[4]\n" - "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n" - "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n" - "SUB TEMP[1].w, TEMP[3], TEMP[2]\n" - "MOV %s, TEMP[1]\n"; - -static const char blend_lighten_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - EXTENDED_BLEND_OVER - "MUL TEMP[4], TEMP[0], TEMP[1].wwww\n" - "MUL TEMP[5], TEMP[1], TEMP[0].wwww\n" - "MAX TEMP[4], TEMP[4], TEMP[5]\n" - "ADD TEMP[1], TEMP[3], TEMP[4]\n" - "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n" - "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n" - "SUB TEMP[1].w, TEMP[3], TEMP[2]\n" - "MOV %s, TEMP[1]\n"; - - -static const char premultiply_asm[] = - "MUL TEMP[0].xyz, TEMP[0], TEMP[0].wwww\n"; - -static const char unpremultiply_asm[] = - "TEX TEMP[0], IN[0], SAMP[1], 2D\n"; - - -static const char color_bw_asm[] = - "ADD TEMP[1], CONST[1].yyyy, CONST[1].yyyy\n" - "RCP TEMP[2], TEMP[1]\n" - "ADD TEMP[1], CONST[1].yyyy, TEMP[2]\n" - "ADD TEMP[2].x, TEMP[0].xxxx, TEMP[0].yyyy\n" - "ADD TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx\n" - "SGE TEMP[0].xyz, TEMP[2].xxxx, TEMP[1]\n" - "SGE TEMP[0].w, TEMP[0].wwww, TEMP[2].yyyy\n" - "MOV %s, TEMP[0]\n"; +#include "tgsi/tgsi_ureg.h" + +typedef void (* ureg_func)( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant); + +static INLINE void +solid_fill( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_MOV(ureg, *out, constant[0]); +} + +static INLINE void +linear_grad( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XY), + in[0]); + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_Z), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); + ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); + ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); + ureg_RCP(ureg, temp[3], ureg_src(temp[3])); + ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); + ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + ureg_MUL(ureg, temp[0], + ureg_scalar(constant[0], TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp[1], + ureg_scalar(constant[0], TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X), + ureg_src(temp[0])); + ureg_MUL(ureg, temp[2], ureg_src(temp[1]), + ureg_scalar(constant[0], TGSI_SWIZZLE_Z)); + ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); +} + +static INLINE void +radial_grad( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + + ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]); + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_Z), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); + ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); + ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); + ureg_RCP(ureg, temp[3], ureg_src(temp[3])); + ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); + ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); + ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_X), ureg_src(temp[1])); + ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + ureg_MUL(ureg, temp[0], ureg_scalar(constant[0], TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp[1], + ureg_scalar(constant[0], TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), ureg_src(temp[0])); + ureg_ADD(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[1])); + ureg_MUL(ureg, temp[3], + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp[4], + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), + ureg_src(temp[3])); + ureg_MOV(ureg, temp[4], ureg_negate(ureg_src(temp[4]))); + ureg_MUL(ureg, temp[2], + ureg_scalar(constant[0], TGSI_SWIZZLE_Z), + ureg_src(temp[4])); + ureg_MUL(ureg, temp[0], + ureg_scalar(constant[1], TGSI_SWIZZLE_W), + ureg_src(temp[2])); + ureg_MUL(ureg, temp[3], ureg_src(temp[1]), ureg_src(temp[1])); + + ureg_SUB(ureg, temp[2], ureg_src(temp[3]), ureg_src(temp[0])); + ureg_RSQ(ureg, temp[2], ureg_abs(ureg_src(temp[2]))); + ureg_RCP(ureg, temp[2], ureg_src(temp[2])); + ureg_SUB(ureg, temp[1], ureg_src(temp[2]), ureg_src(temp[1])); + ureg_ADD(ureg, temp[0], + ureg_scalar(constant[0], TGSI_SWIZZLE_Z), + ureg_scalar(constant[0], TGSI_SWIZZLE_Z)); + ureg_RCP(ureg, temp[0], ureg_src(temp[0])); + ureg_MUL(ureg, temp[2], ureg_src(temp[1]), ureg_src(temp[0])); + ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); + +} + + +static INLINE void +pattern( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XY), + in[0]); + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_Z), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); + ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); + ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); + ureg_RCP(ureg, temp[3], ureg_src(temp[3])); + ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); + ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + ureg_RCP(ureg, temp[0], + ureg_swizzle(constant[1], + TGSI_SWIZZLE_Z, + TGSI_SWIZZLE_W, + TGSI_SWIZZLE_Z, + TGSI_SWIZZLE_W)); + ureg_MOV(ureg, temp[1], ureg_src(temp[4])); + ureg_MUL(ureg, + ureg_writemask(temp[1], TGSI_WRITEMASK_X), + ureg_src(temp[1]), + ureg_src(temp[0])); + ureg_MUL(ureg, + ureg_writemask(temp[1], TGSI_WRITEMASK_Y), + ureg_src(temp[1]), + ureg_src(temp[0])); + ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]); +} + +static INLINE void +mask( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]); + ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_MOV(ureg, *out, ureg_src(temp[0])); +} + +static INLINE void +image_normal( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]); +} + + +static INLINE void +image_multiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); + ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); +} + + +static INLINE void +image_stencil( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); + ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); +} + +#define EXTENDED_BLENDER_OVER_FUNC \ + ureg_SUB(ureg, temp[3], \ + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \ + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); \ + ureg_SUB(ureg, temp[3], \ + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \ + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); \ + ureg_MUL(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[3])); \ + ureg_MUL(ureg, temp[4], ureg_src(temp[1]), ureg_src(temp[4])); \ + ureg_ADD(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[4])); + + +static INLINE void +blend_multiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + EXTENDED_BLENDER_OVER_FUNC + ureg_MUL(ureg, temp[4], ureg_src(temp[0]), ureg_src(temp[1])); + ureg_ADD(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[3])); + + ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), + ureg_src(temp[3]), ureg_src(temp[2])); + + ureg_MOV(ureg, *out, ureg_src(temp[1])); +} + +static INLINE void +blend_screen( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + ureg_ADD(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[1])); + ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[1])); + ureg_SUB(ureg, *out, ureg_src(temp[3]), ureg_src(temp[2])); +} + +static INLINE void +blend_darken( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + EXTENDED_BLENDER_OVER_FUNC + ureg_MUL(ureg, temp[4], ureg_src(temp[0]), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_MUL(ureg, temp[5], ureg_src(temp[1]), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); + ureg_MIN(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5])); + ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4])); + + ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), + ureg_src(temp[3]), ureg_src(temp[2])); + + ureg_MOV(ureg, *out, ureg_src(temp[1])); +} + +static INLINE void +blend_lighten( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + EXTENDED_BLENDER_OVER_FUNC + ureg_MUL(ureg, temp[4], ureg_src(temp[0]), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_MUL(ureg, temp[5], ureg_src(temp[1]), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); + ureg_MAX(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5])); + ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4])); + + ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), + ureg_src(temp[3]), ureg_src(temp[2])); + + ureg_MOV(ureg, *out, ureg_src(temp[1])); +} + +static INLINE void +premultiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_MUL(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), + ureg_src(temp[0]), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); +} + +static INLINE void +unpremultiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[0], TGSI_TEXTURE_2D, in[0], sampler[1]); +} + + +static INLINE void +color_bw( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_ADD(ureg, temp[1], + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_RCP(ureg, temp[2], ureg_src(temp[1])); + ureg_ADD(ureg, temp[1], + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), + ureg_src(temp[2])); + ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Y)); + ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Z), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X)); + ureg_SGE(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), + ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_X), + ureg_src(temp[1])); + ureg_SGE(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_W), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_Y)); + ureg_MOV(ureg, *out, ureg_src(temp[0])); +} struct shader_asm_info { VGint id; - VGint num_tokens; - const char * txt; + ureg_func func; VGboolean needs_position; @@ -203,44 +412,44 @@ struct shader_asm_info { static const struct shader_asm_info shaders_asm[] = { /* fills */ - {VEGA_SOLID_FILL_SHADER, 40, solid_fill_asm, + {VEGA_SOLID_FILL_SHADER, solid_fill, VG_FALSE, 0, 1, 0, 0, 0, 0}, - {VEGA_LINEAR_GRADIENT_SHADER, 200, linear_grad_asm, + {VEGA_LINEAR_GRADIENT_SHADER, linear_grad, VG_TRUE, 0, 5, 0, 1, 0, 5}, - {VEGA_RADIAL_GRADIENT_SHADER, 200, radial_grad_asm, + {VEGA_RADIAL_GRADIENT_SHADER, radial_grad, VG_TRUE, 0, 5, 0, 1, 0, 6}, - {VEGA_PATTERN_SHADER, 100, pattern_asm, + {VEGA_PATTERN_SHADER, pattern, VG_TRUE, 1, 4, 0, 1, 0, 5}, /* image draw modes */ - {VEGA_IMAGE_NORMAL_SHADER, 200, image_normal_asm, + {VEGA_IMAGE_NORMAL_SHADER, image_normal, VG_TRUE, 0, 0, 3, 1, 0, 0}, - {VEGA_IMAGE_MULTIPLY_SHADER, 200, image_multiply_asm, + {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply, VG_TRUE, 0, 0, 3, 1, 0, 2}, - {VEGA_IMAGE_STENCIL_SHADER, 200, image_stencil_asm, + {VEGA_IMAGE_STENCIL_SHADER, image_stencil, VG_TRUE, 0, 0, 3, 1, 0, 2}, - {VEGA_MASK_SHADER, 100, mask_asm, + {VEGA_MASK_SHADER, mask, VG_TRUE, 0, 0, 1, 1, 0, 2}, /* extra blend modes */ - {VEGA_BLEND_MULTIPLY_SHADER, 200, blend_multiply_asm, + {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply, VG_TRUE, 1, 1, 2, 1, 0, 5}, - {VEGA_BLEND_SCREEN_SHADER, 200, blend_screen_asm, + {VEGA_BLEND_SCREEN_SHADER, blend_screen, VG_TRUE, 0, 0, 2, 1, 0, 4}, - {VEGA_BLEND_DARKEN_SHADER, 200, blend_darken_asm, + {VEGA_BLEND_DARKEN_SHADER, blend_darken, VG_TRUE, 1, 1, 2, 1, 0, 6}, - {VEGA_BLEND_LIGHTEN_SHADER, 200, blend_lighten_asm, + {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten, VG_TRUE, 1, 1, 2, 1, 0, 6}, /* premultiply */ - {VEGA_PREMULTIPLY_SHADER, 100, premultiply_asm, + {VEGA_PREMULTIPLY_SHADER, premultiply, VG_FALSE, 0, 0, 0, 0, 0, 1}, - {VEGA_UNPREMULTIPLY_SHADER, 100, unpremultiply_asm, + {VEGA_UNPREMULTIPLY_SHADER, unpremultiply, VG_FALSE, 0, 0, 0, 0, 0, 1}, /* color transform to black and white */ - {VEGA_BW_SHADER, 150, color_bw_asm, + {VEGA_BW_SHADER, color_bw, VG_FALSE, 1, 1, 0, 0, 0, 3}, }; #endif diff --git a/src/gallium/state_trackers/vega/shaders_cache.c b/src/gallium/state_trackers/vega/shaders_cache.c index 593e60fc63a..f43fe6ee4cb 100644 --- a/src/gallium/state_trackers/vega/shaders_cache.c +++ b/src/gallium/state_trackers/vega/shaders_cache.c @@ -123,17 +123,23 @@ static INLINE VGint range_max(VGint max, VGint current) return MAX2(max, current); } -static void -create_preamble(char *txt, - const struct shader_asm_info *shaders[SHADER_STAGES], - int num_shaders) +static void * +combine_shaders(const struct shader_asm_info *shaders[SHADER_STAGES], int num_shaders, + struct pipe_context *pipe, + struct pipe_shader_state *shader) { VGboolean declare_input = VG_FALSE; VGint start_const = -1, end_const = 0; VGint start_temp = -1, end_temp = 0; VGint start_sampler = -1, end_sampler = 0; - VGint i; + VGint i, current_shader = 0; VGint num_consts, num_temps, num_samplers; + struct ureg_program *ureg; + struct ureg_src in[2]; + struct ureg_src *sampler = NULL; + struct ureg_src *constant = NULL; + struct ureg_dst out, *temp = NULL; + void *p = NULL; for (i = 0; i < num_shaders; ++i) { if (shaders[i]->num_consts) @@ -158,99 +164,94 @@ create_preamble(char *txt, if (start_temp < 0) start_temp = 0; if (start_sampler < 0) - start_sampler = 0; + start_sampler = 0; num_consts = end_const - start_const; num_temps = end_temp - start_temp; num_samplers = end_sampler - start_sampler; - /* end exclusive */ - --end_const; - --end_temp; - --end_sampler; - sprintf(txt, "FRAG\n"); + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!ureg) + return NULL; if (declare_input) { - sprintf(txt + strlen(txt), "DCL IN[0], POSITION, LINEAR\n"); - sprintf(txt + strlen(txt), "DCL IN[1], GENERIC[0], PERSPECTIVE\n"); + in[0] = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_POSITION, + 0, + TGSI_INTERPOLATE_LINEAR); + in[1] = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_GENERIC, + 0, + TGSI_INTERPOLATE_PERSPECTIVE); } /* we always have a color output */ - sprintf(txt + strlen(txt), "DCL OUT[0], COLOR, CONSTANT\n"); - - if (num_consts > 1) - sprintf(txt + strlen(txt), "DCL CONST[%d..%d], CONSTANT\n", start_const, end_const); - else if (num_consts == 1) - sprintf(txt + strlen(txt), "DCL CONST[%d], CONSTANT\n", start_const); - - if (num_temps > 1) - sprintf(txt + strlen(txt), "DCL TEMP[%d..%d], CONSTANT\n", start_temp, end_temp); - else if (num_temps > 1) - sprintf(txt + strlen(txt), "DCL TEMP[%d], CONSTANT\n", start_temp); - - if (num_samplers > 1) - sprintf(txt + strlen(txt), "DCL SAMP[%d..%d], CONSTANT\n", start_sampler, end_sampler); - else if (num_samplers == 1) - sprintf(txt + strlen(txt), "DCL SAMP[%d], CONSTANT\n", start_sampler); -} + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); -static void * -combine_shaders(const struct shader_asm_info *shaders[SHADER_STAGES], int num_shaders, - struct pipe_context *pipe, - struct pipe_shader_state *shader) -{ - char *combined_txt; - int combined_len = MAX_PREAMBLE; - int combined_tokens = 0; - int i = 0; - int current_shader = 0; - int current_len; + if (num_consts >= 1) { + constant = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_const); + for (i = start_const; i < end_const; i++) { + constant[i] = ureg_DECL_constant(ureg, i); + } - for (i = 0; i < num_shaders; ++i) { - combined_len += strlen(shaders[i]->txt); - combined_tokens += shaders[i]->num_tokens; } - /* add for the %s->TEMP[0] substitutions */ - combined_len += num_shaders * 7 /*TEMP[0]*/ + 4 /*"END\n"*/; - combined_txt = (char*)malloc(combined_len); - combined_txt[0] = '\0'; + if (num_temps >= 1) { + temp = (struct ureg_dst *) malloc(sizeof(struct ureg_dst) * end_temp); + for (i = start_temp; i < end_temp; i++) { + temp[i] = ureg_DECL_temporary(ureg); + } + } - create_preamble(combined_txt, shaders, num_shaders); + if (num_samplers >= 1) { + sampler = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_sampler); + for (i = start_sampler; i < end_sampler; i++) { + sampler[i] = ureg_DECL_sampler(ureg, i); + } + } while (current_shader < num_shaders) { - const char temp[] = "TEMP[0]"; - const char out[] = "OUT[0]"; - const char *subst = temp; - - current_len = strlen(combined_txt); - - /* if the last shader then output */ - if (current_shader + 1 == num_shaders) - subst = out; - - snprintf(combined_txt + current_len, - combined_len - current_len, - shaders[current_shader]->txt, - subst); - ++current_shader; + if ((current_shader + 1) == num_shaders) { + shaders[current_shader]->func(ureg, + &out, + in, + sampler, + temp, + constant); + } else { + shaders[current_shader]->func(ureg, + &temp[0], + in, + sampler, + temp, + constant); + } + current_shader++; } + ureg_END(ureg); - current_len = strlen(combined_txt); - snprintf(combined_txt + current_len, - combined_len - current_len, - "END\n"); + shader->tokens = ureg_finalize(ureg); + if(!shader->tokens) + return NULL; - debug_printf("Combined shader is : \n%s\n", - combined_txt); + p = pipe->create_fs_state(pipe, shader); + ureg_destroy(ureg); - shader->tokens = tokens_from_assembly( - combined_txt, combined_tokens); + if (num_temps >= 1) { + for (i = start_temp; i < end_temp; i++) { + ureg_release_temporary(ureg, temp[i]); + } + } - free(combined_txt); + if (temp) + free(temp); + if (constant) + free(constant); + if (sampler) + free(sampler); - return pipe->create_fs_state(pipe, shader); + return p; } static void * diff --git a/src/mesa/drivers/directfb/idirectfbgl_mesa.c b/src/mesa/drivers/directfb/idirectfbgl_mesa.c index 62a3269d171..85a6f036724 100644 --- a/src/mesa/drivers/directfb/idirectfbgl_mesa.c +++ b/src/mesa/drivers/directfb/idirectfbgl_mesa.c @@ -813,7 +813,7 @@ directfbgl_create_context( GLcontext *context, { struct dd_function_table functions; - _mesa_initialize_framebuffer( framebuffer, visual ); + _mesa_initialize_window_framebuffer( framebuffer, visual ); _mesa_init_driver_functions( &functions ); functions.GetString = dfbGetString; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e8cc202f889..c9ef1647a32 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -202,7 +202,7 @@ intelCreateBuffer(__DRIscreen * driScrnPriv, if (!fb) return GL_FALSE; - _mesa_initialize_framebuffer(fb, mesaVis); + _mesa_initialize_window_framebuffer(fb, mesaVis); if (mesaVis->redBits == 5) rgbFormat = MESA_FORMAT_RGB565; diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c index 249c0bbc11d..aecba7f8949 100644 --- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c +++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c @@ -74,7 +74,7 @@ static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, GLvoid *data, int stride, int count) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - uint32_t *out; + GLfloat *out; int i; int size = 1; @@ -91,7 +91,7 @@ static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, aos->count = count; radeon_bo_map(aos->bo, 1); - out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); + out = (GLfloat*)((char*)aos->bo->ptr + aos->offset); for (i = 0; i < count; i++) { out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data ); out++; diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index aa69b0fc72b..928c15e1e40 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -297,7 +297,7 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) if (flags[0] != 0) { sprintf(tmp, "o%i.%s", (code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + rgb_addr >> 29) & 3, flags); strcat(dstc, tmp); } @@ -311,7 +311,7 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", (code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + alpha_addr >> 25) & 3); strcat(dsta, tmp); } if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 375838d98e7..cc552aee176 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -176,7 +176,9 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); emit->node_flags |= R300_RGBA_OUT; } @@ -187,7 +189,8 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | + R300_ALPHA_TARGET(inst->Alpha.Target); emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.DepthWriteMask) { diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 5581f25352d..c2d5dc27b49 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -35,7 +35,10 @@ static void dataflow_outputs_mark_use(void * userdata, void * data, void (*callback)(void *, unsigned int, unsigned int)) { struct r300_fragment_program_compiler * c = userdata; - callback(data, c->OutputColor, RC_MASK_XYZW); + callback(data, c->OutputColor[0], RC_MASK_XYZW); + callback(data, c->OutputColor[1], RC_MASK_XYZW); + callback(data, c->OutputColor[2], RC_MASK_XYZW); + callback(data, c->OutputColor[3], RC_MASK_XYZW); callback(data, c->OutputDepth, RC_MASK_W); } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index b1b14394b6e..c2eb613b23f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -241,6 +241,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + if (inst->WriteALUResult) { code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index f27f858652e..6bfda0574f6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -83,8 +83,10 @@ struct r300_fragment_program_compiler { struct rX00_fragment_program_code *code; struct r300_fragment_program_external_state state; unsigned is_r500; + /* Register corresponding to the depthbuffer. */ unsigned OutputDepth; - unsigned OutputColor; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; void * UserData; void (*AllocateHwInputs)( diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index 72117682725..fff5b0c2173 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -203,12 +203,21 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, /* Destination handling */ if (inst->DstReg.File == RC_FILE_OUTPUT) { - if (inst->DstReg.Index == c->OutputColor) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; - pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == c->OutputDepth) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= + inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= + GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } } else { if (needrgb) { pair->RGB.DestIndex = inst->DstReg.Index; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 6685ade3ea8..511cc707a38 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -60,6 +60,7 @@ struct radeon_pair_instruction_rgb { unsigned int Opcode:8; unsigned int DestIndex:RC_REGISTER_INDEX_BITS; unsigned int WriteMask:3; + unsigned int Target:2; unsigned int OutputWriteMask:3; unsigned int Saturate:1; @@ -77,6 +78,7 @@ struct radeon_pair_instruction_alpha { unsigned int Opcode:8; unsigned int DestIndex:RC_REGISTER_INDEX_BITS; unsigned int WriteMask:1; + unsigned int Target:2; unsigned int OutputWriteMask:1; unsigned int DepthWriteMask:1; unsigned int Saturate:1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index d863b82d53f..28fb9eae925 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -229,7 +229,7 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst (inst->RGB.WriteMask & 2) ? "y" : "", (inst->RGB.WriteMask & 4) ? "z" : ""); if (inst->RGB.OutputWriteMask) - fprintf(f, " color.%s%s%s", + fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, (inst->RGB.OutputWriteMask & 1) ? "x" : "", (inst->RGB.OutputWriteMask & 2) ? "y" : "", (inst->RGB.OutputWriteMask & 4) ? "z" : ""); @@ -255,7 +255,7 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst if (inst->Alpha.WriteMask) fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); if (inst->Alpha.OutputWriteMask) - fprintf(f, " color.w"); + fprintf(f, " color[%i].w", inst->Alpha.Target); if (inst->Alpha.DepthWriteMask) fprintf(f, " depth.w"); if (inst->WriteALUResult == RC_ALURESULT_W) diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index e24c7955d4b..54ac2510e7a 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -114,7 +114,7 @@ static void create_fragment_program(struct r300_context *r300) inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0); - compiler.OutputColor = FRAG_RESULT_COLOR; + compiler.OutputColor[0] = FRAG_RESULT_COLOR; compiler.OutputDepth = FRAG_RESULT_DEPTH; compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515); compiler.code = &r300->blit.fp_code; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index e41aeff91a4..a0e2dd3c09f 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -221,7 +221,8 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog compiler.state = fp->state; compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; compiler.OutputDepth = FRAG_RESULT_DEPTH; - compiler.OutputColor = FRAG_RESULT_COLOR; + memset(compiler.OutputColor, 0, 4 * sizeof(unsigned)); + compiler.OutputColor[0] = FRAG_RESULT_COLOR; compiler.AllocateHwInputs = &allocate_hw_inputs; if (compiler.Base.Debug) { diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6fa1a0663ba..93b6399a669 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1479,7 +1479,7 @@ radeonCreateBuffer( __DRIscreen *driScrnPriv, if (!rfb) return GL_FALSE; - _mesa_initialize_framebuffer(&rfb->base, mesaVis); + _mesa_initialize_window_framebuffer(&rfb->base, mesaVis); if (mesaVis->redBits == 5) rgbFormat = _mesa_little_endian() ? MESA_FORMAT_RGB565 : MESA_FORMAT_RGB565_REV; diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 8340861aff8..4e823669bfc 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -368,7 +368,7 @@ driCreateNewDrawable(__DRIscreen *screen, buf->row = _mesa_malloc(MAX_WIDTH * 4); /* basic framebuffer setup */ - _mesa_initialize_framebuffer(&buf->Base, &config->modes); + _mesa_initialize_window_framebuffer(&buf->Base, &config->modes); /* add front renderbuffer */ frontrb = swrast_new_renderbuffer(&config->modes, GL_TRUE); diff --git a/src/mesa/drivers/fbdev/glfbdev.c b/src/mesa/drivers/fbdev/glfbdev.c index 531558dc4d6..1a56b2395fa 100644 --- a/src/mesa/drivers/fbdev/glfbdev.c +++ b/src/mesa/drivers/fbdev/glfbdev.c @@ -626,7 +626,7 @@ glFBDevCreateBuffer( const struct fb_fix_screeninfo *fixInfo, return NULL; /* basic framebuffer setup */ - _mesa_initialize_framebuffer(&buf->glframebuffer, &visual->glvisual); + _mesa_initialize_window_framebuffer(&buf->glframebuffer, &visual->glvisual); /* add front renderbuffer */ frontrb = new_glfbdev_renderbuffer(frontBuffer, visual); _mesa_add_renderbuffer(&buf->glframebuffer, BUFFER_FRONT_LEFT, diff --git a/src/mesa/drivers/glslcompiler/Makefile b/src/mesa/drivers/glslcompiler/Makefile index fa8293d039a..080fe475c16 100644 --- a/src/mesa/drivers/glslcompiler/Makefile +++ b/src/mesa/drivers/glslcompiler/Makefile @@ -10,6 +10,7 @@ PROGRAM = glslcompiler OBJECTS = \ glslcompiler.o \ ../../glapi/glapi.o \ + ../../glapi/glapi_nop.o \ ../../glapi/glthread.o \ ../../main/dispatch.o \ ../common/driverfuncs.o \ diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c index ceeafd5f909..b24b758cfb2 100644 --- a/src/mesa/drivers/windows/gdi/wmesa.c +++ b/src/mesa/drivers/windows/gdi/wmesa.c @@ -35,7 +35,7 @@ wmesa_new_framebuffer(HDC hdc, GLvisual *visual) WMesaFramebuffer pwfb = (WMesaFramebuffer) malloc(sizeof(struct wmesa_framebuffer)); if (pwfb) { - _mesa_initialize_framebuffer(&pwfb->Base, visual); + _mesa_initialize_window_framebuffer(&pwfb->Base, visual); pwfb->hDC = hdc; /* insert at head of list */ pwfb->next = FirstFramebuffer; diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index bf767bceddf..1a5456e1be2 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -375,7 +375,7 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type, b->type = type; b->cmap = cmap; - _mesa_initialize_framebuffer(&b->mesa_buffer, &vis->mesa_visual); + _mesa_initialize_window_framebuffer(&b->mesa_buffer, &vis->mesa_visual); b->mesa_buffer.Delete = xmesa_delete_framebuffer; /* diff --git a/src/mesa/main/bitset.h b/src/mesa/main/bitset.h index 8bd4526cb6f..f2709abc9fd 100644 --- a/src/mesa/main/bitset.h +++ b/src/mesa/main/bitset.h @@ -27,7 +27,12 @@ * \brief Bitset of arbitrary size definitions. * \author Michal Krol */ - + +#ifndef BITSET_H +#define BITSET_H + +#include "imports.h" + /**************************************************************************** * generic bitset implementation */ @@ -74,6 +79,23 @@ ((x)[BITSET_BITWORD(b)] &= ~BITSET_RANGE(b, e)) : \ (assert (!"BITSET_CLEAR_RANGE: bit range crosses word boundary"), 0)) +/* Get first bit set in a bitset. + */ +static INLINE int +__bitset_ffs(const BITSET_WORD *x, int n) +{ + int i; + + for (i = 0; i < n; i++) { + if (x[i]) + return _mesa_ffs(x[i]) + BITSET_WORDBITS * i; + } + + return 0; +} + +#define BITSET_FFS(x) __bitset_ffs(x, Elements(x)) + /**************************************************************************** * 64-bit bitset implementation */ @@ -120,3 +142,4 @@ ((x)[BITSET64_BITWORD(b)] &= ~BITSET64_RANGE(b, e)) : \ (assert (!"BITSET64_CLEAR_RANGE: bit range crosses word boundary"), 0)) +#endif diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index d958dbf7d48..96e53443836 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -88,7 +88,7 @@ _mesa_create_framebuffer(const GLvisual *visual) struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer); assert(visual); if (fb) { - _mesa_initialize_framebuffer(fb, visual); + _mesa_initialize_window_framebuffer(fb, visual); } return fb; } @@ -109,15 +109,7 @@ _mesa_new_framebuffer(GLcontext *ctx, GLuint name) assert(name != 0); fb = CALLOC_STRUCT(gl_framebuffer); if (fb) { - fb->Name = name; - fb->RefCount = 1; - fb->_NumColorDrawBuffers = 1; - fb->ColorDrawBuffer[0] = GL_COLOR_ATTACHMENT0_EXT; - fb->_ColorDrawBufferIndexes[0] = BUFFER_COLOR0; - fb->ColorReadBuffer = GL_COLOR_ATTACHMENT0_EXT; - fb->_ColorReadBufferIndex = BUFFER_COLOR0; - fb->Delete = _mesa_destroy_framebuffer; - _glthread_INIT_MUTEX(fb->Mutex); + _mesa_initialize_user_framebuffer(fb, name); } return fb; } @@ -126,10 +118,11 @@ _mesa_new_framebuffer(GLcontext *ctx, GLuint name) /** * Initialize a gl_framebuffer object. Typically used to initialize * window system-created framebuffers, not user-created framebuffers. - * \sa _mesa_create_framebuffer + * \sa _mesa_initialize_user_framebuffer */ void -_mesa_initialize_framebuffer(struct gl_framebuffer *fb, const GLvisual *visual) +_mesa_initialize_window_framebuffer(struct gl_framebuffer *fb, + const GLvisual *visual) { assert(fb); assert(visual); @@ -167,6 +160,30 @@ _mesa_initialize_framebuffer(struct gl_framebuffer *fb, const GLvisual *visual) /** + * Initialize a user-created gl_framebuffer object. + * \sa _mesa_initialize_window_framebuffer + */ +void +_mesa_initialize_user_framebuffer(struct gl_framebuffer *fb, GLuint name) +{ + assert(fb); + assert(name); + + _mesa_bzero(fb, sizeof(struct gl_framebuffer)); + + fb->Name = name; + fb->RefCount = 1; + fb->_NumColorDrawBuffers = 1; + fb->ColorDrawBuffer[0] = GL_COLOR_ATTACHMENT0_EXT; + fb->_ColorDrawBufferIndexes[0] = BUFFER_COLOR0; + fb->ColorReadBuffer = GL_COLOR_ATTACHMENT0_EXT; + fb->_ColorReadBufferIndex = BUFFER_COLOR0; + fb->Delete = _mesa_destroy_framebuffer; + _glthread_INIT_MUTEX(fb->Mutex); +} + + +/** * Deallocate buffer and everything attached to it. * Typically called via the gl_framebuffer->Delete() method. */ diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index ef21dd98e83..960513812cf 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -34,7 +34,11 @@ extern struct gl_framebuffer * _mesa_new_framebuffer(GLcontext *ctx, GLuint name); extern void -_mesa_initialize_framebuffer(struct gl_framebuffer *fb, const GLvisual *visual); +_mesa_initialize_window_framebuffer(struct gl_framebuffer *fb, + const GLvisual *visual); + +extern void +_mesa_initialize_user_framebuffer(struct gl_framebuffer *fb, GLuint name); extern void _mesa_destroy_framebuffer(struct gl_framebuffer *buffer); diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index 9f9789e010c..54fd88ad4fb 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -150,6 +150,10 @@ arb_input_attrib_string(GLint index, GLenum progType) "fragment.varying[7]" }; + /* sanity checks */ + assert(strcmp(vertAttribs[VERT_ATTRIB_TEX0], "vertex.texcoord[0]") == 0); + assert(strcmp(vertAttribs[VERT_ATTRIB_GENERIC15], "vertex.attrib[15]") == 0); + if (progType == GL_VERTEX_PROGRAM_ARB) { assert(index < sizeof(vertAttribs) / sizeof(vertAttribs[0])); return vertAttribs[index]; @@ -162,6 +166,43 @@ arb_input_attrib_string(GLint index, GLenum progType) /** + * Print a vertex program's InputsRead field in human-readable format. + * For debugging. + */ +void +_mesa_print_vp_inputs(GLbitfield inputs) +{ + _mesa_printf("VP Inputs 0x%x: \n", inputs); + while (inputs) { + GLint attr = _mesa_ffs(inputs) - 1; + const char *name = arb_input_attrib_string(attr, + GL_VERTEX_PROGRAM_ARB); + _mesa_printf(" %d: %s\n", attr, name); + inputs &= ~(1 << attr); + } +} + + +/** + * Print a fragment program's InputsRead field in human-readable format. + * For debugging. + */ +void +_mesa_print_fp_inputs(GLbitfield inputs) +{ + _mesa_printf("FP Inputs 0x%x: \n", inputs); + while (inputs) { + GLint attr = _mesa_ffs(inputs) - 1; + const char *name = arb_input_attrib_string(attr, + GL_FRAGMENT_PROGRAM_ARB); + _mesa_printf(" %d: %s\n", attr, name); + inputs &= ~(1 << attr); + } +} + + + +/** * Return ARB_v/f_prog-style output attrib string. */ static const char * diff --git a/src/mesa/shader/prog_print.h b/src/mesa/shader/prog_print.h index fc286ded540..9ab74560169 100644 --- a/src/mesa/shader/prog_print.h +++ b/src/mesa/shader/prog_print.h @@ -37,6 +37,12 @@ typedef enum { } gl_prog_print_mode; +extern void +_mesa_print_vp_inputs(GLbitfield inputs); + +extern void +_mesa_print_fp_inputs(GLbitfield inputs); + extern const char * _mesa_condcode_string(GLuint condcode); diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index 3e86d0adad4..aaf5f96e2a5 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -677,6 +677,8 @@ _mesa_combine_programs(GLcontext *ctx, const GLuint lenB = progB->NumInstructions; const GLuint numParamsA = _mesa_num_parameters(progA->Parameters); const GLuint newLength = lenA + lenB; + GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLuint firstTemp = 0; GLbitfield inputsB; GLuint i; @@ -698,6 +700,10 @@ _mesa_combine_programs(GLcontext *ctx, newProg->Instructions = newInst; newProg->NumInstructions = newLength; + /* find used temp regs (we may need new temps below) */ + _mesa_find_used_registers(newProg, PROGRAM_TEMPORARY, + usedTemps, MAX_PROGRAM_TEMPS); + if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprogA, *fprogB, *newFprog; GLbitfield progB_inputsRead = progB->InputsRead; @@ -741,12 +747,15 @@ _mesa_combine_programs(GLcontext *ctx, */ if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLOR)) && (progB_inputsRead & FRAG_BIT_COL0)) { - GLint tempReg = _mesa_find_free_register(newProg, PROGRAM_TEMPORARY); + GLint tempReg = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS, + firstTemp); if (tempReg < 0) { _mesa_problem(ctx, "No free temp regs found in " "_mesa_combine_programs(), using 31"); tempReg = 31; } + firstTemp = tempReg + 1; + /* replace writes to result.color[0] with tempReg */ replace_registers(newInst, lenA, PROGRAM_OUTPUT, FRAG_RESULT_COLOR, @@ -784,53 +793,64 @@ _mesa_combine_programs(GLcontext *ctx, } - - /** - * Scan the given program to find a free register of the given type. - * \param regFile - PROGRAM_INPUT, PROGRAM_OUTPUT or PROGRAM_TEMPORARY + * Populate the 'used' array with flags indicating which registers (TEMPs, + * INPUTs, OUTPUTs, etc, are used by the given program. + * \param file type of register to scan for + * \param used returns true/false flags for in use / free + * \param usedSize size of the 'used' array */ -GLint -_mesa_find_free_register(const struct gl_program *prog, GLuint regFile) +void +_mesa_find_used_registers(const struct gl_program *prog, + gl_register_file file, + GLboolean used[], GLuint usedSize) { - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i, k; - - assert(regFile == PROGRAM_INPUT || - regFile == PROGRAM_OUTPUT || - regFile == PROGRAM_TEMPORARY); + GLuint i, j; - _mesa_memset(used, 0, sizeof(used)); + _mesa_memset(used, 0, usedSize); for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - /* check dst reg first */ - if (inst->DstReg.File == regFile) { + if (inst->DstReg.File == file) { used[inst->DstReg.Index] = GL_TRUE; } - else { - /* check src regs otherwise */ - for (k = 0; k < n; k++) { - if (inst->SrcReg[k].File == regFile) { - used[inst->SrcReg[k].Index] = GL_TRUE; - break; - } + + for (j = 0; j < n; j++) { + if (inst->SrcReg[j].File == file) { + used[inst->SrcReg[j].Index] = GL_TRUE; } } } +} - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + +/** + * Scan the given 'used' register flag array for the first entry + * that's >= firstReg. + * \param used vector of flags indicating registers in use (as returned + * by _mesa_find_used_registers()) + * \param usedSize size of the 'used' array + * \param firstReg first register to start searching at + * \return index of unused register, or -1 if none. + */ +GLint +_mesa_find_free_register(const GLboolean used[], + GLuint usedSize, GLuint firstReg) +{ + GLuint i; + + assert(firstReg < usedSize); + + for (i = firstReg; i < usedSize; i++) if (!used[i]) return i; - } return -1; } - /** * "Post-process" a GPU program. This is intended to be used for debugging. * Example actions include no-op'ing instructions or changing instruction diff --git a/src/mesa/shader/program.h b/src/mesa/shader/program.h index 56a4191f578..0187a2c55ff 100644 --- a/src/mesa/shader/program.h +++ b/src/mesa/shader/program.h @@ -119,8 +119,14 @@ _mesa_combine_programs(GLcontext *ctx, const struct gl_program *progA, const struct gl_program *progB); +extern void +_mesa_find_used_registers(const struct gl_program *prog, + gl_register_file file, + GLboolean used[], GLuint usedSize); + extern GLint -_mesa_find_free_register(const struct gl_program *prog, GLuint regFile); +_mesa_find_free_register(const GLboolean used[], + GLuint maxRegs, GLuint firstReg); extern void _mesa_postprocess_program(GLcontext *ctx, struct gl_program *prog); diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index 9514545709d..fb2ebe6338f 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -495,6 +495,11 @@ _mesa_remove_output_reads(struct gl_program *prog, gl_register_file type) GLuint i; GLint outputMap[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; + GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLuint firstTemp = 0; + + _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, + usedTemps, MAX_PROGRAM_TEMPS); assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); @@ -513,8 +518,10 @@ _mesa_remove_output_reads(struct gl_program *prog, gl_register_file type) const GLuint var = inst->SrcReg[j].Index; if (outputMap[var] == -1) { numVaryingReads++; - outputMap[var] = _mesa_find_free_register(prog, - PROGRAM_TEMPORARY); + outputMap[var] = _mesa_find_free_register(usedTemps, + MAX_PROGRAM_TEMPS, + firstTemp); + firstTemp = outputMap[var] + 1; } inst->SrcReg[j].File = PROGRAM_TEMPORARY; inst->SrcReg[j].Index = outputMap[var]; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index aa0508a41fc..d9b508537d0 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -60,26 +60,10 @@ static void translate_fp(struct st_context *st, struct st_fragment_program *stfp) { - const GLbitfield fragInputsRead = stfp->Base.Base.InputsRead; - if (!stfp->state.tokens) { - GLuint inAttr, numIn = 0; - - for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { - if (fragInputsRead & (1 << inAttr)) { - stfp->input_to_slot[inAttr] = numIn; - numIn++; - } - else { - stfp->input_to_slot[inAttr] = -1; - } - } - - stfp->num_input_slots = numIn; - assert(stfp->Base.Base.NumInstructions > 0); - st_translate_fragment_program(st, stfp, stfp->input_to_slot); + st_translate_fragment_program(st, stfp); } } diff --git a/src/mesa/state_tracker/st_atom_viewport.c b/src/mesa/state_tracker/st_atom_viewport.c index 27ec2eb0331..b82bbfe410f 100644 --- a/src/mesa/state_tracker/st_atom_viewport.c +++ b/src/mesa/state_tracker/st_atom_viewport.c @@ -62,9 +62,9 @@ update_viewport( struct st_context *st ) GLfloat x = (GLfloat)ctx->Viewport.X; GLfloat y = (GLfloat)ctx->Viewport.Y; GLfloat z = ctx->Viewport.Near; - GLfloat half_width = (GLfloat)ctx->Viewport.Width / 2.0f; - GLfloat half_height = (GLfloat)ctx->Viewport.Height / 2.0f; - GLfloat half_depth = (GLfloat)(ctx->Viewport.Far - ctx->Viewport.Near) / 2.0f; + GLfloat half_width = (GLfloat)ctx->Viewport.Width * 0.5f; + GLfloat half_height = (GLfloat)ctx->Viewport.Height * 0.5f; + GLfloat half_depth = (GLfloat)(ctx->Viewport.Far - ctx->Viewport.Near) * 0.5f; st->state.viewport.scale[0] = half_width; st->state.viewport.scale[1] = half_height * yScale; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index c70bbc880c0..85420a950f4 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -221,7 +221,7 @@ combined_bitmap_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp->bitmap_program, NULL); + st_translate_fragment_program(st, stfp->bitmap_program); } return stfp->bitmap_program; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 9b1d2103f17..2a084ca5779 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -140,7 +140,7 @@ combined_drawpix_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp, NULL); + st_translate_fragment_program(st, stfp); /* save new program, update serial numbers */ st->pixel_xfer.xfer_prog_sn = st->pixel_xfer.program->serialNo; @@ -221,7 +221,7 @@ make_fragment_shader_z(struct st_context *st) p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ st->drawpix.z_shader = (struct st_fragment_program *) p; - st_translate_fragment_program(st, st->drawpix.z_shader, NULL); + st_translate_fragment_program(st, st->drawpix.z_shader); return st->drawpix.z_shader; } diff --git a/src/mesa/state_tracker/st_cb_strings.c b/src/mesa/state_tracker/st_cb_strings.c index 996e065fedc..0fcb427f30a 100644 --- a/src/mesa/state_tracker/st_cb_strings.c +++ b/src/mesa/state_tracker/st_cb_strings.c @@ -39,7 +39,7 @@ #include "st_context.h" #include "st_cb_strings.h" -#define ST_VERSION_STRING "0.3" +#define ST_VERSION_STRING "0.4" static const GLubyte * st_get_string(GLcontext * ctx, GLenum name) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 7c5664be1d8..13f050900a6 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -543,22 +543,15 @@ st_TexImage(GLcontext * ctx, _mesa_align_free(texImage->Data); } - if (width == 0 || height == 0 || depth == 0) { - /* stop after freeing old image */ - return; - } - - /* If this is the only mipmap level in the texture, could call - * bmBufferData with NULL data to free the old block and avoid - * waiting on any outstanding fences. + /* + * See if the new image is somehow incompatible with the existing + * mipmap. If so, free the old mipmap. */ if (stObj->pt) { if (stObj->teximage_realloc || level > (GLint) stObj->pt->last_level || - (stObj->pt->last_level == level && - stObj->pt->target != PIPE_TEXTURE_CUBE && - !st_texture_match_image(stObj->pt, &stImage->base, - stImage->face, stImage->level))) { + !st_texture_match_image(stObj->pt, &stImage->base, + stImage->face, stImage->level)) { DBG("release it\n"); pipe_texture_reference(&stObj->pt, NULL); assert(!stObj->pt); @@ -566,6 +559,11 @@ st_TexImage(GLcontext * ctx, } } + if (width == 0 || height == 0 || depth == 0) { + /* stop after freeing old image */ + return; + } + if (!stObj->pt) { guess_and_alloc_texture(ctx->st, stObj, stImage); if (!stObj->pt) { diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index ed9c0ff5b76..4e225a123c8 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -54,7 +54,7 @@ st_create_framebuffer( const __GLcontextModes *visual, if (visual->sampleBuffers) samples = visual->samples; - _mesa_initialize_framebuffer(&stfb->Base, visual); + _mesa_initialize_window_framebuffer(&stfb->Base, visual); if (visual->doubleBufferMode) { struct gl_renderbuffer *rb diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 5c87e47ca3d..a639003dbd0 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -269,24 +269,20 @@ fail: /** * Translate a Mesa fragment shader into a TGSI shader. - * \param inputMapping to map fragment program input registers to TGSI - * input slots * \return pointer to cached pipe_shader object. */ void st_translate_fragment_program(struct st_context *st, - struct st_fragment_program *stfp, - const GLuint inputMapping[]) + struct st_fragment_program *stfp ) { struct pipe_context *pipe = st->pipe; GLuint outputMapping[FRAG_RESULT_MAX]; - GLuint defaultInputMapping[FRAG_ATTRIB_MAX]; + GLuint inputMapping[FRAG_ATTRIB_MAX]; GLuint interpMode[16]; /* XXX size? */ GLuint attr; enum pipe_error error; const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; - GLuint vslot = 0; uint fs_num_inputs = 0; @@ -294,24 +290,14 @@ st_translate_fragment_program(struct st_context *st, ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; uint fs_num_outputs = 0; - /* which vertex output goes to the first fragment input: */ - if (inputsRead & FRAG_BIT_WPOS) - vslot = 0; - else - vslot = 1; - /* * Convert Mesa program inputs to TGSI input register semantics. */ for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { if (inputsRead & (1 << attr)) { - const GLuint slot = fs_num_inputs; - - defaultInputMapping[attr] = slot; - - stfp->input_map[slot] = vslot++; + const GLuint slot = fs_num_inputs++; - fs_num_inputs++; + inputMapping[attr] = slot; switch (attr) { case FRAG_ATTRIB_WPOS: @@ -376,6 +362,9 @@ st_translate_fragment_program(struct st_context *st, break; } } + else { + inputMapping[attr] = -1; + } } /* @@ -417,9 +406,6 @@ st_translate_fragment_program(struct st_context *st, } } - if (!inputMapping) - inputMapping = defaultInputMapping; - ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) return; diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 6b9a9226df5..d9822e50f55 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -52,12 +52,6 @@ struct st_fragment_program struct gl_fragment_program Base; GLuint serialNo; - GLuint input_to_slot[FRAG_ATTRIB_MAX]; /**< Maps FRAG_ATTRIB_x to slot */ - GLuint num_input_slots; - - /** map FP input back to VP output */ - GLuint input_map[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; @@ -171,8 +165,7 @@ st_reference_fragprog(struct st_context *st, extern void st_translate_fragment_program(struct st_context *st, - struct st_fragment_program *fp, - const GLuint inputMapping[]); + struct st_fragment_program *fp); /* Called after program string change, discard all previous diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c index da84eaa6ead..2fc866c5773 100644 --- a/src/mesa/vbo/vbo_split_inplace.c +++ b/src/mesa/vbo/vbo_split_inplace.c @@ -30,12 +30,15 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/enums.h" +#include "main/image.h" #include "vbo_split.h" #define MAX_PRIM 32 -/* Used for splitting without copying. +/* Used for splitting without copying. No attempt is made to handle + * too large indexed vertex buffers: In general you need to copy to do + * that. */ struct split_context { GLcontext *ctx; @@ -48,6 +51,7 @@ struct split_context { vbo_draw_func draw; const struct split_limits *limits; + GLuint limit; struct _mesa_prim dstprim[MAX_PRIM]; GLuint dstprim_nr; @@ -58,38 +62,38 @@ struct split_context { static void flush_vertex( struct split_context *split ) { - GLuint min_index, max_index; + struct _mesa_index_buffer ib; GLuint i; if (!split->dstprim_nr) return; - min_index = split->dstprim[0].start; - max_index = min_index + split->dstprim[0].count - 1; + if (split->ib) { + ib = *split->ib; - for (i = 1; i < split->dstprim_nr; i++) { - GLuint tmp_min = split->dstprim[i].start; - GLuint tmp_max = tmp_min + split->dstprim[i].count - 1; + ib.count = split->max_index - split->min_index + 1; + ib.ptr = (const void *)((const char *)ib.ptr + + split->min_index * _mesa_sizeof_type(ib.type)); - if (tmp_min < min_index) - min_index = tmp_min; - - if (tmp_max > max_index) - max_index = tmp_max; + /* Rebase the primitives to save index buffer entries. */ + for (i = 0; i < split->dstprim_nr; i++) + split->dstprim[i].start -= split->min_index; } - assert(max_index >= min_index); + assert(split->max_index >= split->min_index); - split->draw( split->ctx, - split->array, - split->dstprim, - split->dstprim_nr, - NULL, - GL_TRUE, - min_index, - max_index); + split->draw(split->ctx, + split->array, + split->dstprim, + split->dstprim_nr, + split->ib ? &ib : NULL, + !split->ib, + split->min_index, + split->max_index); split->dstprim_nr = 0; + split->min_index = ~0; + split->max_index = 0; } @@ -106,62 +110,67 @@ static struct _mesa_prim *next_outprim( struct split_context *split ) } } -static int align(int value, int alignment) +static void update_index_bounds(struct split_context *split, + const struct _mesa_prim *prim) { - return (value + alignment - 1) & ~(alignment - 1); + split->min_index = MIN2(split->min_index, prim->start); + split->max_index = MAX2(split->max_index, prim->start + prim->count - 1); } - +/* Return the maximum amount of vertices that can be emitted for a + * primitive starting at 'prim->start', depending on the previous + * index bounds. + */ +static GLuint get_max_vertices(struct split_context *split, + const struct _mesa_prim *prim) +{ + if ((prim->start > split->min_index && + prim->start - split->min_index >= split->limit) || + (prim->start < split->max_index && + split->max_index - prim->start >= split->limit)) + /* "prim" starts too far away from the old range. */ + return 0; + + return MIN2(split->min_index, prim->start) + split->limit - prim->start; +} /* Break large primitives into smaller ones. If not possible, convert * the primitive to indexed and pass to split_elts(). */ static void split_prims( struct split_context *split) { - GLuint csr = 0; GLuint i; for (i = 0; i < split->nr_prims; i++) { const struct _mesa_prim *prim = &split->prim[i]; GLuint first, incr; GLboolean split_inplace = split_prim_inplace(prim->mode, &first, &incr); - GLuint count; - - /* Always wrap on an even numbered vertex to avoid problems with - * triangle strips. - */ - GLuint available = align(split->limits->max_verts - csr - 1, 2); - assert(split->limits->max_verts >= csr); + GLuint available = get_max_vertices(split, prim); + GLuint count = prim->count - (prim->count - first) % incr; if (prim->count < first) continue; - - count = prim->count - (prim->count - first) % incr; - - if ((available < count && !split_inplace) || + if ((available < count && !split_inplace) || (available < first && split_inplace)) { flush_vertex(split); - csr = 0; - available = align(split->limits->max_verts - csr - 1, 2); + available = get_max_vertices(split, prim); } if (available >= count) { struct _mesa_prim *outprim = next_outprim(split); + *outprim = *prim; - csr += prim->count; - available = align(split->limits->max_verts - csr - 1, 2); - } + update_index_bounds(split, outprim); + } else if (split_inplace) { GLuint j, nr; - for (j = 0 ; j < count ; ) { GLuint remaining = count - j; struct _mesa_prim *outprim = next_outprim(split); nr = MIN2( available, remaining ); - nr -= (nr - first) % incr; outprim->mode = prim->mode; @@ -169,21 +178,20 @@ static void split_prims( struct split_context *split) outprim->end = (nr == remaining && prim->end); outprim->start = prim->start + j; outprim->count = nr; - + + update_index_bounds(split, outprim); + if (nr == remaining) { /* Finished. */ - j += nr; - csr += nr; - available = align(split->limits->max_verts - csr - 1, 2); + j += nr; } else { /* Wrapped the primitive: */ j += nr - (first - incr); flush_vertex(split); - csr = 0; - available = align(split->limits->max_verts - csr - 1, 2); + available = get_max_vertices(split, prim); } } } @@ -260,10 +268,14 @@ void vbo_split_inplace( GLcontext *ctx, split.prim = prim; split.nr_prims = nr_prims; split.ib = ib; - split.min_index = min_index; - split.max_index = max_index; + + /* Empty interval, makes calculations simpler. */ + split.min_index = ~0; + split.max_index = 0; + split.draw = draw; split.limits = limits; + split.limit = ib ? limits->max_indices : limits->max_verts; split_prims( &split ); } |