diff options
author | Ben Skeggs <[email protected]> | 2008-03-19 22:51:17 +1100 |
---|---|---|
committer | Ben Skeggs <[email protected]> | 2008-03-19 22:51:17 +1100 |
commit | 62767cf2dd1006621ecd6023b15d65b5cff41dfa (patch) | |
tree | fe287d4a281884467531d6ac53a29539f1fcd18a | |
parent | 176df85568992a5d99aab7f0b1e382d41459aa13 (diff) | |
parent | ec890533c2852fa62366d449e6fbc899fb0498be (diff) |
Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
55 files changed, 4319 insertions, 592 deletions
diff --git a/progs/tests/Makefile b/progs/tests/Makefile index 7053ebc86aa..ea34a708553 100644 --- a/progs/tests/Makefile +++ b/progs/tests/Makefile @@ -51,6 +51,7 @@ SOURCES = \ manytex.c \ minmag.c \ mipmap_limits.c \ + mipmap_view.c \ multipal.c \ no_s3tc.c \ packedpixels.c \ @@ -158,6 +159,14 @@ invert: invert.o readtex.o invert.o: invert.c readtex.h $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ +mipmap_view: mipmap_view.o readtex.o + $(CC) $(CFLAGS) mipmap_view.o readtex.o $(LIBS) -o $@ + +mipmap_view.o: mipmap_view.c readtex.h + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + + + readtex.o: readtex.c $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ diff --git a/progs/tests/mipmap_view.c b/progs/tests/mipmap_view.c new file mode 100644 index 00000000000..d821f432f0b --- /dev/null +++ b/progs/tests/mipmap_view.c @@ -0,0 +1,241 @@ +/* + * Test mipmap generation and lod bias. + * + * Brian Paul + * 17 March 2008 + */ + + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <GL/glut.h> +#include <GL/glext.h> + +#include "readtex.h" + +#define TEXTURE_FILE "../images/arch.rgb" + +static int TexWidth = 256, TexHeight = 256; +static int WinWidth = 1044, WinHeight = 900; +static GLfloat Bias = 0.0; +static GLboolean ScaleQuads = GL_FALSE; + + +static void +PrintString(const char *s) +{ + while (*s) { + glutBitmapCharacter(GLUT_BITMAP_8_BY_13, (int) *s); + s++; + } +} + + +static void +Display(void) +{ + int x, y, bias; + char str[100]; + int texWidth = TexWidth, texHeight = TexHeight; + + glClear(GL_COLOR_BUFFER_BIT); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(0, WinWidth, 0, WinHeight, -1, 1); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + glColor3f(1,1,1); + + y = WinHeight - 300; + x = 4; + + for (bias = -1; bias < 11; bias++) { + + glRasterPos2f(x, y + TexHeight + 5); + sprintf(str, "Texture LOD Bias = %d", bias); + PrintString(str); + + glPushMatrix(); + glTranslatef(x, y, 0); + + glEnable(GL_TEXTURE_2D); + + if (ScaleQuads) { + if (bias > 0) { + texWidth = TexWidth >> bias; + texHeight = TexHeight >> bias; + if (texWidth < 1) + texWidth = 1; + if (texHeight < 1) + texHeight = 1; + } + glTexEnvf(GL_TEXTURE_FILTER_CONTROL_EXT, GL_TEXTURE_LOD_BIAS_EXT, 0.0); + } + else { + glTexEnvf(GL_TEXTURE_FILTER_CONTROL_EXT, GL_TEXTURE_LOD_BIAS_EXT, bias); + } + + glBegin(GL_POLYGON); + glTexCoord2f(0, 0); glVertex2f(0, 0); + glTexCoord2f(1, 0); glVertex2f(texWidth, 0); + glTexCoord2f(1, 1); glVertex2f(texWidth, texHeight); + glTexCoord2f(0, 1); glVertex2f(0, texHeight); + glEnd(); + + glPopMatrix(); + + glDisable(GL_TEXTURE_2D); + + x += TexWidth + 4; + if (x >= WinWidth) { + x = 4; + y -= 300; + } + } + + glutSwapBuffers(); +} + + +static void +Reshape(int width, int height) +{ + WinWidth = width; + WinHeight = height; + glViewport(0, 0, width, height); +} + + +static void +Key(unsigned char key, int x, int y) +{ + (void) x; + (void) y; + switch (key) { + case 'b': + Bias -= 10; + break; + case 'B': + Bias += 10; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + Bias = 100.0 * (key - '0'); + break; + case 's': + ScaleQuads = !ScaleQuads; + break; + case 27: + exit(0); + break; + } + glutPostRedisplay(); +} + + +static void +Init(void) +{ + GLfloat maxBias; + + if (!glutExtensionSupported("GL_EXT_texture_lod_bias")) { + printf("Sorry, GL_EXT_texture_lod_bias not supported by this renderer.\n"); + exit(1); + } + + if (!glutExtensionSupported("GL_SGIS_generate_mipmap")) { + printf("Sorry, GL_SGIS_generate_mipmap not supported by this renderer.\n"); + exit(1); + } + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + if (1) { + /* test auto mipmap generation */ + GLint width, height, i; + GLenum format; + GLubyte *image = LoadRGBImage(TEXTURE_FILE, &width, &height, &format); + if (!image) { + printf("Error: could not load texture image %s\n", TEXTURE_FILE); + exit(1); + } + /* resize to TexWidth x TexHeight */ + if (width != TexWidth || height != TexHeight) { + GLubyte *newImage = malloc(TexWidth * TexHeight * 4); + gluScaleImage(format, width, height, GL_UNSIGNED_BYTE, image, + TexWidth, TexHeight, GL_UNSIGNED_BYTE, newImage); + free(image); + image = newImage; + } + printf("Using GL_SGIS_generate_mipmap\n"); + glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP_SGIS, GL_TRUE); + glTexImage2D(GL_TEXTURE_2D, 0, format, TexWidth, TexHeight, 0, + format, GL_UNSIGNED_BYTE, image); + free(image); + + /* make sure mipmap was really generated correctly */ + width = TexWidth; + height = TexHeight; + for (i = 0; i < 9; i++) { + GLint w, h; + glGetTexLevelParameteriv(GL_TEXTURE_2D, i, GL_TEXTURE_WIDTH, &w); + glGetTexLevelParameteriv(GL_TEXTURE_2D, i, GL_TEXTURE_HEIGHT, &h); + printf("Level %d size: %d x %d\n", i, w, h); + assert(w == width); + assert(h == height); + width /= 2; + height /= 2; + } + } + else { + if (LoadRGBMipmaps(TEXTURE_FILE, GL_RGB)) { + printf("Using gluBuildMipmaps()\n"); + } + else { + printf("Error: could not load texture image %s\n", TEXTURE_FILE); + exit(1); + } + } + + + /* mipmapping required for this extension */ + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); + + glGetFloatv(GL_MAX_TEXTURE_LOD_BIAS_EXT, &maxBias); + + printf("GL_RENDERER: %s\n", (char*) glGetString(GL_RENDERER)); + printf("LOD bias range: [%g, %g]\n", -maxBias, maxBias); + + printf("Press 's' to toggle quad scaling\n"); +} + + +int +main(int argc, char *argv[]) +{ + glutInit(&argc, argv); + glutInitWindowPosition(0, 0); + glutInitWindowSize(WinWidth, WinHeight); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); + glutCreateWindow(argv[0]); + glutReshapeFunc(Reshape); + glutKeyboardFunc(Key); + glutDisplayFunc(Display); + Init(); + glutMainLoop(); + return 0; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index fd86bfaca9c..294ac82281e 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -149,6 +149,12 @@ void cso_set_blend(struct cso_context *ctx, } } +void cso_unset_blend(struct cso_context *ctx) +{ + ctx->blend = NULL; +} + + void cso_single_sampler(struct cso_context *ctx, unsigned idx, const struct pipe_sampler_state *templ) @@ -220,6 +226,15 @@ void cso_set_samplers( struct cso_context *ctx, cso_single_sampler_done( ctx ); } +void cso_unset_samplers( struct cso_context *ctx ) +{ + uint i; + for (i = 0; i < ctx->nr_samplers; i++) + ctx->samplers[i] = NULL; +} + + + void cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { @@ -252,6 +267,11 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, } } +void cso_unset_depth_stencil_alpha(struct cso_context *ctx) +{ + ctx->depth_stencil = NULL; +} + void cso_set_rasterizer(struct cso_context *ctx, @@ -285,7 +305,10 @@ void cso_set_rasterizer(struct cso_context *ctx, } } - +void cso_unset_rasterizer(struct cso_context *ctx) +{ + ctx->rasterizer = NULL; +} @@ -320,6 +343,12 @@ void cso_set_fragment_shader(struct cso_context *ctx, } } +void cso_unset_fragment_shader(struct cso_context *ctx) +{ + ctx->fragment_shader = NULL; +} + + void cso_set_vertex_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) { @@ -350,3 +379,8 @@ void cso_set_vertex_shader(struct cso_context *ctx, ctx->pipe->bind_vs_state(ctx->pipe, handle); } } + +void cso_unset_vertex_shader(struct cso_context *ctx) +{ + ctx->vertex_shader = NULL; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 1f2a6308043..6aa619abf56 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -44,16 +44,25 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ); void cso_set_blend( struct cso_context *cso, const struct pipe_blend_state *blend ); +void cso_unset_blend(struct cso_context *cso); + void cso_set_depth_stencil_alpha( struct cso_context *cso, const struct pipe_depth_stencil_alpha_state *dsa ); +void cso_unset_depth_stencil_alpha( struct cso_context *cso ); + void cso_set_rasterizer( struct cso_context *cso, const struct pipe_rasterizer_state *rasterizer ); +void cso_unset_rasterizer( struct cso_context *cso ); + void cso_set_samplers( struct cso_context *cso, unsigned count, const struct pipe_sampler_state **states ); +void cso_unset_samplers( struct cso_context *cso ); + + /* Alternate interface to support state trackers that like to modify * samplers one at a time: */ @@ -72,9 +81,13 @@ void cso_single_sampler_done( struct cso_context *cso ); void cso_set_fragment_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +void cso_unset_fragment_shader( struct cso_context *cso ); + void cso_set_vertex_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +void cso_unset_vertex_shader( struct cso_context *cso ); + void cso_destroy_context( struct cso_context *cso ); diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c index d16f056191c..fdec6a591b8 100644 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -85,14 +85,52 @@ fetch_store_general( struct draw_context *draw, const unsigned *pitch = draw->vertex_fetch.pitch; const ubyte **src = draw->vertex_fetch.src_ptr; - for (i = start; i < count; i++) { + for (i = start; i < start + count; i++) { for (j = 0; j < nr_attrs; j++) { + /* vinfo->src_index is the output of the vertex shader + * matching this hw-vertex component. + * + * In passthrough, we require a 1:1 mapping between vertex + * shader outputs and inputs, which in turn correspond to + * vertex elements in the state. So, this is the vertex + * element we're interested in... + */ const uint jj = vinfo->src_index[j]; const enum pipe_format srcFormat = draw->vertex_element[jj].src_format; const ubyte *from = src[jj] + i * pitch[jj]; float attrib[4]; + /* Except... When we're not. Two cases EMIT_HEADER & + * EMIT_1F_PSIZE don't consume an input. Should have some + * method for indicating this, or change the logic here + * somewhat so it doesn't matter. + * + * Just hack this up now, do something better about it later. + */ + if (vinfo->emit[j] == EMIT_HEADER) { + memset(out, 0, sizeof(struct vertex_header)); + out += sizeof(struct vertex_header) / 4; + continue; + } + else if (vinfo->emit[j] == EMIT_1F_PSIZE) { + out[0] = 1.0; /* xxx */ + out += 1; + continue; + } + + + /* The normal fetch/emit code: + */ switch (srcFormat) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + ubyte *ub = (ubyte *) from; + attrib[0] = UBYTE_TO_FLOAT(ub[0]); + attrib[1] = UBYTE_TO_FLOAT(ub[1]); + attrib[2] = UBYTE_TO_FLOAT(ub[2]); + attrib[3] = UBYTE_TO_FLOAT(ub[3]); + } + break; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *f = (float *) from; @@ -130,14 +168,21 @@ fetch_store_general( struct draw_context *draw, } break; default: - abort(); + assert(0); } - /* XXX this will probably only work for softpipe */ + debug_printf("attrib %d: %f %f %f %f\n", j, + attrib[0], attrib[1], attrib[2], attrib[3]); + switch (vinfo->emit[j]) { - case EMIT_HEADER: - memset(out, 0, sizeof(struct vertex_header)); - out += sizeof(struct vertex_header) / 4; + case EMIT_1F: + out[0] = attrib[0]; + out += 1; + break; + case EMIT_2F: + out[0] = attrib[0]; + out[1] = attrib[1]; + out += 2; break; case EMIT_4F: out[0] = attrib[0]; @@ -147,64 +192,15 @@ fetch_store_general( struct draw_context *draw, out += 4; break; default: - abort(); + assert(0); } - } + debug_printf("\n"); } } -/* Example of a fetch/emit passthrough shader which could be - * generated when bypass_clipping is enabled on a passthrough vertex - * shader. - */ -static void fetch_xyz_rgb_st( struct draw_context *draw, - float *out, - unsigned start, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - unsigned i; - - const ubyte *xyzw = src[0] + start * pitch[0]; - const ubyte *rgba = src[1] + start * pitch[1]; - const ubyte *st = src[2] + start * pitch[2]; - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyzw; xyzw += pitch[0]; - /* decode input, encode output. Assume both are float[4] */ - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - } - - { - const float *in = (const float *)rgba; rgba += pitch[1]; - /* decode input, encode output. Assume both are float[4] */ - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = in[3]; - } - - { - const float *in = (const float *)st; st += pitch[2]; - /* decode input, encode output. Assume both are float[2] */ - out[8] = in[0]; - out[9] = in[1]; - } - - out += 10; - } -} - static boolean update_shader( struct draw_context *draw ) { const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); @@ -229,70 +225,166 @@ static boolean update_shader( struct draw_context *draw ) draw->pt.hw_vertex_size = vinfo->size * 4; - /* Just trying to figure out how this would work: - */ - if (draw->rasterizer->bypass_vs || - (nr_attrs == 3 && 0 /* some other tests */)) - { -#if 0 - draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st; -#else - draw->vertex_fetch.pt_fetch = fetch_store_general; -#endif - /*assert(vinfo->size == 10);*/ + draw->vertex_fetch.pt_fetch = fetch_store_general; + return TRUE; +} + + + + +static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + return TRUE; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; return TRUE; + case PIPE_PRIM_LINE_STRIP: + *first = 2; + *incr = 1; + return TRUE; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + *first = 3; + *incr = 1; + return TRUE; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + return TRUE; + default: + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + return FALSE; } - - return FALSE; } static boolean set_prim( struct draw_context *draw, - unsigned prim ) + unsigned prim, + unsigned count ) { assert(!draw->user.elts); - draw->pt.prim = prim; - switch (prim) { case PIPE_PRIM_LINE_LOOP: + if (count > 1024) + return FALSE; + return draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + if (count > 1024) + return FALSE; + return draw->render->set_primitive( draw->render, prim ); + case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: - return FALSE; + return draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); + default: - draw->render->set_primitive( draw->render, prim ); - return TRUE; + return draw->render->set_primitive( draw->render, prim ); + break; } + + return TRUE; } -boolean -draw_passthrough_arrays(struct draw_context *draw, - unsigned prim, - unsigned start, - unsigned count) + +#define INDEX(i) (start + (i)) +static void pt_draw_arrays( struct draw_context *draw, + unsigned start, + unsigned length ) { - float *hw_verts; + ushort *tmp = NULL; + unsigned i, j; - if (draw_need_pipeline(draw)) - return FALSE; + switch (draw->pt.prim) { + case PIPE_PRIM_LINE_LOOP: + tmp = MALLOC( sizeof(ushort) * (length + 1) ); - if (!set_prim(draw, prim)) - return FALSE; + for (i = 0; i < length; i++) + tmp[i] = INDEX(i); + tmp[length] = 0; - if (!update_shader(draw)) - return FALSE; + draw->render->draw( draw->render, + tmp, + length+1 ); + break; - hw_verts = draw->render->allocate_vertices( draw->render, - draw->pt.hw_vertex_size, - count ); + + case PIPE_PRIM_QUAD_STRIP: + tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); + + for (j = i = 0; i + 3 < length; i += 2, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+2); + tmp[j+4] = INDEX(i+0); + tmp[j+5] = INDEX(i+3); + } + + if (j) + draw->render->draw( draw->render, tmp, j ); + break; + + case PIPE_PRIM_QUADS: + tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); + + for (j = i = 0; i + 3 < length; i += 4, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+1); + tmp[j+4] = INDEX(i+2); + tmp[j+5] = INDEX(i+3); + } + + if (j) + draw->render->draw( draw->render, tmp, j ); + break; + + default: + draw->render->draw_arrays( draw->render, + start, + length ); + break; + } + + if (tmp) + FREE(tmp); +} + + + +static boolean do_draw( struct draw_context *draw, + unsigned start, unsigned count ) +{ + float *hw_verts = + draw->render->allocate_vertices( draw->render, + (ushort)draw->pt.hw_vertex_size, + (ushort)count ); if (!hw_verts) return FALSE; - /* Single routine to fetch vertices, run shader and emit HW verts. - * Clipping and viewport transformation are done on hardware. + /* Single routine to fetch vertices and emit HW verts. */ draw->vertex_fetch.pt_fetch( draw, hw_verts, @@ -301,9 +393,9 @@ draw_passthrough_arrays(struct draw_context *draw, /* Draw arrays path to avoid re-emitting index list again and * again. */ - draw->render->draw_arrays( draw->render, - start, - count ); + pt_draw_arrays( draw, + 0, + count ); draw->render->release_vertices( draw->render, @@ -314,3 +406,68 @@ draw_passthrough_arrays(struct draw_context *draw, return TRUE; } + +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + unsigned i = 0; + unsigned first, incr; + + //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); + + split_prim_inplace(prim, &first, &incr); + + count -= (count - first) % incr; + + debug_printf("%s %d %d %d\n", __FUNCTION__, prim, start, count); + + if (draw_need_pipeline(draw)) + return FALSE; + + debug_printf("%s AAA\n", __FUNCTION__); + + if (!set_prim(draw, prim, count)) + return FALSE; + + /* XXX: need a single value that reflects the most recent call to + * driver->set_primitive: + */ + draw->pt.prim = prim; + + debug_printf("%s BBB\n", __FUNCTION__); + + if (!update_shader(draw)) + return FALSE; + + debug_printf("%s CCC\n", __FUNCTION__); + + /* Chop this up into bite-sized pieces that a driver should be able + * to devour -- problem is we don't have a quick way to query the + * driver on the maximum size for this chunk in the current state. + */ + while (i + first <= count) { + int nr = MIN2( count - i, 1024 ); + + /* snap to prim boundary + */ + nr -= (nr - first) % incr; + + if (!do_draw( draw, start + i, nr )) { + assert(0); + return FALSE; + } + + /* increment allowing for repeated vertices + */ + i += nr - (first - incr); + } + + + debug_printf("%s DONE\n", __FUNCTION__); + return TRUE; +} + + diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 888fa536ea3..cb0277fb6c9 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -169,11 +169,12 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) flushing = TRUE; if (flags >= DRAW_FLUSH_SHADER_QUEUE) { - if (draw->vs.queue_nr) + if (draw->vs.queue_nr) { if (draw->rasterizer->bypass_vs) fetch_and_store(draw); else (*draw->shader_queue_flush)(draw); + } if (flags >= DRAW_FLUSH_PRIM_QUEUE) { if (draw->pq.queue_nr) diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 5e7de905c16..e90f37872a1 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -74,9 +74,11 @@ struct vbuf_render { /** * Notify the renderer of the current primitive when it changes. - * Prim is restricted to TRIANGLES, LINES and POINTS. + * Must succeed for TRIANGLES, LINES and POINTS. Other prims at + * the discretion of the driver, for the benefit of the passthrough + * path. */ - void (*set_primitive)( struct vbuf_render *, unsigned prim ); + boolean (*set_primitive)( struct vbuf_render *, unsigned prim ); /** * DrawElements, note indices are ushort: diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index a996218ce72..24be65bff9d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -316,7 +316,9 @@ void spe_init_func(struct spe_function *p, unsigned code_size) void spe_release_func(struct spe_function *p) { - align_free(p->store); + if (p->store != NULL) { + align_free(p->store); + } p->store = NULL; p->csr = NULL; } @@ -326,8 +328,8 @@ int spe_allocate_available_register(struct spe_function *p) { unsigned i; for (i = 0; i < 128; i++) { - const uint64_t mask = (1ULL << (i % 128)); - const unsigned idx = i / 128; + const uint64_t mask = (1ULL << (i % 64)); + const unsigned idx = i / 64; if ((p->regs[idx] & mask) != 0) { p->regs[idx] &= ~mask; @@ -341,8 +343,8 @@ int spe_allocate_available_register(struct spe_function *p) int spe_allocate_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 128; - const unsigned bit = reg % 128; + const unsigned idx = reg / 64; + const unsigned bit = reg % 64; assert((p->regs[idx] & (1ULL << bit)) != 0); @@ -353,8 +355,8 @@ int spe_allocate_register(struct spe_function *p, int reg) void spe_release_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 128; - const unsigned bit = reg % 128; + const unsigned idx = reg / 64; + const unsigned bit = reg % 64; assert((p->regs[idx] & (1ULL << bit)) == 0); diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 2016c6fb1f7..9b6c2708b62 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,9 +7,13 @@ C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ + u_blit.c \ + u_draw_quad.c \ + u_gen_mipmap.c \ u_handle_table.c \ u_hash_table.c \ u_mm.c \ + u_simple_shaders.c \ u_snprintf.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 154a3eca8cc..b44f2d5e398 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,9 +6,13 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_tile.c', 'p_util.c', + 'u_blit.c', + 'u_draw_quad.c', + 'u_gen_mipmap.c', 'u_handle_table.c', 'u_hash_table.c', 'u_mm.c', + 'u_simple_shaders.c', 'u_snprintf.c', ]) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c new file mode 100644 index 00000000000..4b4ab8185f2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_blit.c @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Copy/blit pixel rect between surfaces + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_draw_quad.h" +#include "util/u_blit.h" +#include "util/u_simple_shaders.h" + + +struct blit_state +{ + struct pipe_context *pipe; + + void *blend; + void *depthstencil; + void *rasterizer; + void *samplers[2]; /* one for linear, one for nearest sampling */ + + /*struct pipe_viewport_state viewport;*/ + struct pipe_sampler_state *vs; + struct pipe_sampler_state *fs; +}; + + +/** + * Create state object for blit. + * Intended to be created once and re-used for many blit() calls. + */ +struct blit_state * +util_create_blit(struct pipe_context *pipe) +{ + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct blit_state *ctx; + struct pipe_sampler_state sampler; + + ctx = CALLOC_STRUCT(blit_state); + if (!ctx) + return NULL; + + ctx->pipe = pipe; + + /* we don't use blending, but need to set valid values */ + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + ctx->blend = pipe->create_blend_state(pipe, &blend); + + /* depth/stencil/alpha */ + memset(&depthstencil, 0, sizeof(depthstencil)); + ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + + /* rasterizer */ + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.front_winding = PIPE_WINDING_CW; + rasterizer.cull_mode = PIPE_WINDING_NONE; + rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + /*rasterizer.bypass_vs = 1;*/ + ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer); + + /* samplers */ + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.normalized_coords = 1; + ctx->samplers[0] = pipe->create_sampler_state(pipe, &sampler); + + sampler.min_img_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR; + ctx->samplers[1] = pipe->create_sampler_state(pipe, &sampler); + + +#if 0 + /* viewport */ + ctx->viewport.scale[0] = 1.0; + ctx->viewport.scale[1] = 1.0; + ctx->viewport.scale[2] = 1.0; + ctx->viewport.scale[3] = 1.0; + ctx->viewport.translate[0] = 0.0; + ctx->viewport.translate[1] = 0.0; + ctx->viewport.translate[2] = 0.0; + ctx->viewport.translate[3] = 0.0; +#endif + + /* vertex shader */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indexes); + } + + /* fragment shader */ + ctx->fs = util_make_fragment_tex_shader(pipe); + + return ctx; +} + + +/** + * Destroy a blit context + */ +void +util_destroy_blit(struct blit_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_blend_state(pipe, ctx->blend); + pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->delete_rasterizer_state(pipe, ctx->rasterizer); + pipe->delete_sampler_state(pipe, ctx->samplers[0]); + pipe->delete_sampler_state(pipe, ctx->samplers[1]); + + pipe->delete_vs_state(pipe, ctx->vs); + pipe->delete_fs_state(pipe, ctx->fs); + + FREE(ctx); +} + + +/** + * Copy pixel block from src surface to dst surface. + * Overlapping regions are acceptable. + * XXX need some control over blitting Z and/or stencil. + */ +void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp, *tex; + struct pipe_surface *texSurf; + struct pipe_framebuffer_state fb; + const int srcW = abs(srcX1 - srcX0); + const int srcH = abs(srcY1 - srcY0); + const int srcLeft = MIN2(srcX0, srcX1); + const int srcTop = MIN2(srcY0, srcY1); + + assert(filter == PIPE_TEX_MIPFILTER_NEAREST || + filter == PIPE_TEX_MIPFILTER_LINEAR); + + if (srcLeft != srcX0) { + /* left-right flip */ + int tmp = dstX0; + dstX0 = dstX1; + dstX1 = tmp; + } + + if (srcTop != srcY0) { + /* up-down flip */ + int tmp = dstY0; + dstY0 = dstY1; + dstY1 = tmp; + } + + /* + * XXX for now we're always creating a temporary texture. + * Strictly speaking that's not always needed. + */ + + /* create temp texture */ + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = src->format; + texTemp.last_level = 0; + texTemp.width[0] = srcW; + texTemp.height[0] = srcH; + texTemp.depth[0] = 1; + texTemp.compressed = 0; + texTemp.cpp = pf_get_bits(src->format) / 8; + + tex = screen->texture_create(screen, &texTemp); + if (!tex) + return; + + texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0); + + /* load temp texture */ + pipe->surface_copy(pipe, FALSE, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + fb.cbufs[0] = dst; + pipe->set_framebuffer_state(pipe, &fb); + + /* sampler */ + if (filter == PIPE_TEX_MIPFILTER_NEAREST) + pipe->bind_sampler_states(pipe, 1, &ctx->samplers[0]); + else + pipe->bind_sampler_states(pipe, 1, &ctx->samplers[1]); + + /* texture */ + pipe->set_sampler_textures(pipe, 1, &tex); + + /* shaders */ + pipe->bind_fs_state(pipe, ctx->fs); + pipe->bind_vs_state(pipe, ctx->vs); + + /* misc state */ + pipe->bind_blend_state(pipe, ctx->blend); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->bind_rasterizer_state(pipe, ctx->rasterizer); + + /* draw quad */ + util_draw_texquad(pipe, dstX0, dstY0, dstX1, dstY1, z); + + /* unbind */ + pipe->set_sampler_textures(pipe, 0, NULL); + pipe->bind_sampler_states(pipe, 0, NULL); + + /* free stuff */ + pipe_surface_reference(&texSurf, NULL); + screen->texture_release(screen, &tex); + + /* Note: caller must restore pipe/gallium state at this time */ +} + diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h new file mode 100644 index 00000000000..a349be99ad8 --- /dev/null +++ b/src/gallium/auxiliary/util/u_blit.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_BLIT_H +#define U_BLIT_H + + +struct pipe_context; +struct pipe_surface; + + +struct blit_state; + + +extern struct blit_state * +util_create_blit(struct pipe_context *pipe); + + +extern void +util_destroy_blit(struct blit_state *ctx); + + + +extern void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter); + + +#endif diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c new file mode 100644 index 00000000000..79a69de633d --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_draw_quad.h" + + +/** + * Draw screen-aligned textured quad. + */ +void +util_draw_texquad(struct pipe_context *pipe, + float x0, float y0, float x1, float y1, float z) +{ + struct pipe_buffer *vbuf; + struct pipe_vertex_buffer vbuffer; + struct pipe_vertex_element velement; + uint numAttribs = 2, vertexBytes, i, j; + float *v; + + vertexBytes = 4 * (4 * numAttribs * sizeof(float)); + + /* XXX create one-time */ + vbuf = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_VERTEX, vertexBytes); + assert(vbuf); + + v = (float *) pipe->winsys->buffer_map(pipe->winsys, vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* + * Load vertex buffer + */ + for (i = j = 0; i < 4; i++) { + v[j + 2] = z; /* z */ + v[j + 3] = 1.0; /* w */ + v[j + 6] = 0.0; /* r */ + v[j + 7] = 1.0; /* q */ + j += 8; + } + + v[0] = x0; + v[1] = y0; + v[4] = 0.0; /*s*/ + v[5] = 0.0; /*t*/ + + v[8] = x1; + v[9] = y0; + v[12] = 1.0; + v[13] = 0.0; + + v[16] = x1; + v[17] = y1; + v[20] = 1.0; + v[21] = 1.0; + + v[24] = x0; + v[25] = y1; + v[28] = 0.0; + v[29] = 1.0; + + pipe->winsys->buffer_unmap(pipe->winsys, vbuf); + + /* tell pipe about the vertex buffer */ + vbuffer.buffer = vbuf; + vbuffer.pitch = numAttribs * 4 * sizeof(float); /* vertex size */ + vbuffer.buffer_offset = 0; + pipe->set_vertex_buffer(pipe, 0, &vbuffer); + + /* tell pipe about the vertex attributes */ + for (i = 0; i < numAttribs; i++) { + velement.src_offset = i * 4 * sizeof(float); + velement.vertex_buffer_index = 0; + velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velement.nr_components = 4; + pipe->set_vertex_element(pipe, i, &velement); + } + + /* draw */ + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, 4); + + /* XXX: do one-time */ + pipe_buffer_reference(pipe->winsys, &vbuf, NULL); +} diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h new file mode 100644 index 00000000000..a97f55d2efd --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DRAWQUAD_H +#define U_DRAWQUAD_H + + +extern void +util_draw_texquad(struct pipe_context *pipe, + float x0, float y0, float x1, float y1, float z); + + +#endif diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c new file mode 100644 index 00000000000..e18f8ab72ac --- /dev/null +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -0,0 +1,877 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Mipmap generation utility + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_draw_quad.h" +#include "util/u_gen_mipmap.h" +#include "util/u_simple_shaders.h" + +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" + + +struct gen_mipmap_state +{ + struct pipe_context *pipe; + + void *blend; + void *depthstencil; + void *rasterizer; + /*struct pipe_viewport_state viewport;*/ + struct pipe_sampler_state *vs; + struct pipe_sampler_state *fs; +}; + + + +enum dtype +{ + UBYTE, + UBYTE_3_3_2, + USHORT, + USHORT_4_4_4_4, + USHORT_5_6_5, + USHORT_1_5_5_5_REV, + UINT, + FLOAT, + HALF_FLOAT +}; + + +typedef ushort half_float; + + +#if 0 +extern half_float +float_to_half(float f); + +extern float +half_to_float(half_float h); +#endif + + +/** + * Average together two rows of a source image to produce a single new + * row in the dest image. It's legal for the two source rows to point + * to the same data. The source width must be equal to either the + * dest width or two times the dest width. + * \param datatype GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc. + * \param comps number of components per pixel (1..4) + */ +static void +do_row(enum dtype datatype, uint comps, int srcWidth, + const void *srcRowA, const void *srcRowB, + int dstWidth, void *dstRow) +{ + const uint k0 = (srcWidth == dstWidth) ? 0 : 1; + const uint colStride = (srcWidth == dstWidth) ? 1 : 2; + + assert(comps >= 1); + assert(comps <= 4); + + /* This assertion is no longer valid with non-power-of-2 textures + assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth); + */ + + if (datatype == UBYTE && comps == 4) { + uint i, j, k; + const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA; + const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB; + ubyte(*dst)[4] = (ubyte(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; + } + } + else if (datatype == UBYTE && comps == 3) { + uint i, j, k; + const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA; + const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB; + ubyte(*dst)[3] = (ubyte(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + } + } + else if (datatype == UBYTE && comps == 2) { + uint i, j, k; + const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA; + const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB; + ubyte(*dst)[2] = (ubyte(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2; + } + } + else if (datatype == UBYTE && comps == 1) { + uint i, j, k; + const ubyte *rowA = (const ubyte *) srcRowA; + const ubyte *rowB = (const ubyte *) srcRowB; + ubyte *dst = (ubyte *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2; + } + } + + else if (datatype == USHORT && comps == 4) { + uint i, j, k; + const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA; + const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB; + ushort(*dst)[4] = (ushort(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; + } + } + else if (datatype == USHORT && comps == 3) { + uint i, j, k; + const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA; + const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB; + ushort(*dst)[3] = (ushort(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + } + } + else if (datatype == USHORT && comps == 2) { + uint i, j, k; + const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA; + const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB; + ushort(*dst)[2] = (ushort(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + } + } + else if (datatype == USHORT && comps == 1) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4; + } + } + + else if (datatype == FLOAT && comps == 4) { + uint i, j, k; + const float(*rowA)[4] = (const float(*)[4]) srcRowA; + const float(*rowB)[4] = (const float(*)[4]) srcRowB; + float(*dst)[4] = (float(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; + dst[i][3] = (rowA[j][3] + rowA[k][3] + + rowB[j][3] + rowB[k][3]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 3) { + uint i, j, k; + const float(*rowA)[3] = (const float(*)[3]) srcRowA; + const float(*rowB)[3] = (const float(*)[3]) srcRowB; + float(*dst)[3] = (float(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 2) { + uint i, j, k; + const float(*rowA)[2] = (const float(*)[2]) srcRowA; + const float(*rowB)[2] = (const float(*)[2]) srcRowB; + float(*dst)[2] = (float(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 1) { + uint i, j, k; + const float *rowA = (const float *) srcRowA; + const float *rowB = (const float *) srcRowB; + float *dst = (float *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F; + } + } + +#if 0 + else if (datatype == HALF_FLOAT && comps == 4) { + uint i, j, k, comp; + const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA; + const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB; + half_float(*dst)[4] = (half_float(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 4; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 3) { + uint i, j, k, comp; + const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA; + const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB; + half_float(*dst)[3] = (half_float(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 3; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 2) { + uint i, j, k, comp; + const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA; + const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB; + half_float(*dst)[2] = (half_float(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 2; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 1) { + uint i, j, k; + const half_float *rowA = (const half_float *) srcRowA; + const half_float *rowB = (const half_float *) srcRowB; + half_float *dst = (half_float *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j]); + ak = half_to_float(rowA[k]); + bj = half_to_float(rowB[j]); + bk = half_to_float(rowB[k]); + dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } +#endif + + else if (datatype == UINT && comps == 1) { + uint i, j, k; + const uint *rowA = (const uint *) srcRowA; + const uint *rowB = (const uint *) srcRowB; + uint *dst = (uint *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4; + } + } + + else if (datatype == USHORT_5_6_5 && comps == 3) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x3f; + const int rowAg1 = (rowA[k] >> 5) & 0x3f; + const int rowBg0 = (rowB[j] >> 5) & 0x3f; + const int rowBg1 = (rowB[k] >> 5) & 0x3f; + const int rowAb0 = (rowA[j] >> 11) & 0x1f; + const int rowAb1 = (rowA[k] >> 11) & 0x1f; + const int rowBb0 = (rowB[j] >> 11) & 0x1f; + const int rowBb1 = (rowB[k] >> 11) & 0x1f; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 11) | (green << 5) | red; + } + } + else if (datatype == USHORT_4_4_4_4 && comps == 4) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0xf; + const int rowAr1 = rowA[k] & 0xf; + const int rowBr0 = rowB[j] & 0xf; + const int rowBr1 = rowB[k] & 0xf; + const int rowAg0 = (rowA[j] >> 4) & 0xf; + const int rowAg1 = (rowA[k] >> 4) & 0xf; + const int rowBg0 = (rowB[j] >> 4) & 0xf; + const int rowBg1 = (rowB[k] >> 4) & 0xf; + const int rowAb0 = (rowA[j] >> 8) & 0xf; + const int rowAb1 = (rowA[k] >> 8) & 0xf; + const int rowBb0 = (rowB[j] >> 8) & 0xf; + const int rowBb1 = (rowB[k] >> 8) & 0xf; + const int rowAa0 = (rowA[j] >> 12) & 0xf; + const int rowAa1 = (rowA[k] >> 12) & 0xf; + const int rowBa0 = (rowB[j] >> 12) & 0xf; + const int rowBa1 = (rowB[k] >> 12) & 0xf; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red; + } + } + else if (datatype == USHORT_1_5_5_5_REV && comps == 4) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0xf; + const int rowAg0 = (rowA[j] >> 5) & 0x1f; + const int rowAg1 = (rowA[k] >> 5) & 0x1f; + const int rowBg0 = (rowB[j] >> 5) & 0x1f; + const int rowBg1 = (rowB[k] >> 5) & 0x1f; + const int rowAb0 = (rowA[j] >> 10) & 0x1f; + const int rowAb1 = (rowA[k] >> 10) & 0x1f; + const int rowBb0 = (rowB[j] >> 10) & 0x1f; + const int rowBb1 = (rowB[k] >> 10) & 0x1f; + const int rowAa0 = (rowA[j] >> 15) & 0x1; + const int rowAa1 = (rowA[k] >> 15) & 0x1; + const int rowBa0 = (rowB[j] >> 15) & 0x1; + const int rowBa1 = (rowB[k] >> 15) & 0x1; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red; + } + } + else if (datatype == UBYTE_3_3_2 && comps == 3) { + uint i, j, k; + const ubyte *rowA = (const ubyte *) srcRowA; + const ubyte *rowB = (const ubyte *) srcRowB; + ubyte *dst = (ubyte *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x3; + const int rowAr1 = rowA[k] & 0x3; + const int rowBr0 = rowB[j] & 0x3; + const int rowBr1 = rowB[k] & 0x3; + const int rowAg0 = (rowA[j] >> 2) & 0x7; + const int rowAg1 = (rowA[k] >> 2) & 0x7; + const int rowBg0 = (rowB[j] >> 2) & 0x7; + const int rowBg1 = (rowB[k] >> 2) & 0x7; + const int rowAb0 = (rowA[j] >> 5) & 0x7; + const int rowAb1 = (rowA[k] >> 5) & 0x7; + const int rowBb0 = (rowB[j] >> 5) & 0x7; + const int rowBb1 = (rowB[k] >> 5) & 0x7; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 5) | (green << 2) | red; + } + } + else { + debug_printf("bad format in do_row()"); + } +} + + +static void +format_to_type_comps(enum pipe_format pformat, + enum dtype *datatype, uint *comps) +{ + switch (pformat) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + *datatype = UBYTE; + *comps = 4; + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + *datatype = USHORT_1_5_5_5_REV; + *comps = 4; + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + *datatype = USHORT_4_4_4_4; + *comps = 4; + return; + case PIPE_FORMAT_R5G6B5_UNORM: + *datatype = USHORT_5_6_5; + *comps = 3; + return; + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + *datatype = UBYTE; + *comps = 1; + return; + case PIPE_FORMAT_U_A8_L8: + *datatype = UBYTE; + *comps = 2; + return; + default: + assert(0); + } +} + + +static void +reduce_1d(enum pipe_format pformat, + int srcWidth, const ubyte *srcPtr, + int dstWidth, ubyte *dstPtr) +{ + enum dtype datatype; + uint comps; + + format_to_type_comps(pformat, &datatype, &comps); + + /* we just duplicate the input row, kind of hack, saves code */ + do_row(datatype, comps, + srcWidth, srcPtr, srcPtr, + dstWidth, dstPtr); +} + + +/** + * Strides are in bytes. If zero, it'll be computed as width * bpp. + */ +static void +reduce_2d(enum pipe_format pformat, + int srcWidth, int srcHeight, + int srcRowStride, const ubyte *srcPtr, + int dstWidth, int dstHeight, + int dstRowStride, ubyte *dstPtr) +{ + enum dtype datatype; + uint comps; + const int bpt = pf_get_size(pformat); + const ubyte *srcA, *srcB; + ubyte *dst; + int row; + + format_to_type_comps(pformat, &datatype, &comps); + + if (!srcRowStride) + srcRowStride = bpt * srcWidth; + + if (!dstRowStride) + dstRowStride = bpt * dstWidth; + + /* Compute src and dst pointers */ + srcA = srcPtr; + if (srcHeight > 1) + srcB = srcA + srcRowStride; + else + srcB = srcA; + dst = dstPtr; + + for (row = 0; row < dstHeight; row++) { + do_row(datatype, comps, + srcWidth, srcA, srcB, + dstWidth, dst); + srcA += 2 * srcRowStride; + srcB += 2 * srcRowStride; + dst += dstRowStride; + } +} + + +static void +make_1d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_winsys *winsys = pipe->winsys; + const uint zslice = 0; + uint dstLevel; + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + void *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + + srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + + reduce_1d(pt->format, + srcSurf->width, srcMap, + dstSurf->width, dstMap); + + winsys->buffer_unmap(winsys, srcSurf->buffer); + winsys->buffer_unmap(winsys, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + + +static void +make_2d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_winsys *winsys = pipe->winsys; + const uint zslice = 0; + uint dstLevel; + const int bpt = pf_get_size(pt->format); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + ubyte *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + + srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + + reduce_2d(pt->format, + srcSurf->width, srcSurf->height, + srcSurf->pitch * bpt, srcMap, + dstSurf->width, dstSurf->height, + dstSurf->pitch * bpt, dstMap); + + winsys->buffer_unmap(winsys, srcSurf->buffer); + winsys->buffer_unmap(winsys, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + + +static void +make_3d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ +} + + +static void +fallback_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + switch (pt->target) { + case PIPE_TEXTURE_1D: + make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + case PIPE_TEXTURE_3D: + make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + default: + assert(0); + } +} + + +/** + * Create a mipmap generation context. + * The idea is to create one of these and re-use it each time we need to + * generate a mipmap. + */ +struct gen_mipmap_state * +util_create_gen_mipmap(struct pipe_context *pipe) +{ + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct gen_mipmap_state *ctx; + + ctx = CALLOC_STRUCT(gen_mipmap_state); + if (!ctx) + return NULL; + + ctx->pipe = pipe; + + /* we don't use blending, but need to set valid values */ + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + ctx->blend = pipe->create_blend_state(pipe, &blend); + + /* depth/stencil/alpha */ + memset(&depthstencil, 0, sizeof(depthstencil)); + ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + + /* rasterizer */ + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.front_winding = PIPE_WINDING_CW; + rasterizer.cull_mode = PIPE_WINDING_NONE; + rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + //rasterizer.bypass_vs = 1; + ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer); + +#if 0 + /* viewport */ + ctx->viewport.scale[0] = 1.0; + ctx->viewport.scale[1] = 1.0; + ctx->viewport.scale[2] = 1.0; + ctx->viewport.scale[3] = 1.0; + ctx->viewport.translate[0] = 0.0; + ctx->viewport.translate[1] = 0.0; + ctx->viewport.translate[2] = 0.0; + ctx->viewport.translate[3] = 0.0; +#endif + + /* vertex shader */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indexes); + } + + /* fragment shader */ + ctx->fs = util_make_fragment_tex_shader(pipe); + + return ctx; +} + + +/** + * Destroy a mipmap generation context + */ +void +util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_blend_state(pipe, ctx->blend); + pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->delete_rasterizer_state(pipe, ctx->rasterizer); + pipe->delete_vs_state(pipe, ctx->vs); + pipe->delete_fs_state(pipe, ctx->fs); + + FREE(ctx); +} + + +#if 0 +static void +simple_viewport(struct pipe_context *pipe, uint width, uint height) +{ + struct pipe_viewport_state vp; + + vp.scale[0] = 0.5 * width; + vp.scale[1] = -0.5 * height; + vp.scale[2] = 1.0; + vp.scale[3] = 1.0; + vp.translate[0] = 0.5 * width; + vp.translate[1] = 0.5 * height; + vp.translate[2] = 0.0; + vp.translate[3] = 0.0; + + pipe->set_viewport_state(pipe, &vp); +} +#endif + + +/** + * Generate mipmap images. It's assumed all needed texture memory is + * already allocated. + * + * \param pt the texture to generate mipmap levels for + * \param face which cube face to generate mipmaps for (0 for non-cube maps) + * \param baseLevel the first mipmap level to use as a src + * \param lastLevel the last mipmap level to generate + */ +void +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_framebuffer_state fb; + struct pipe_sampler_state sampler; + void *sampler_cso; + uint dstLevel; + uint zslice = 0; + + /* check if we can render in the texture's format */ + if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { + fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); + return; + } + + /* init framebuffer state */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + + /* sampler state */ + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + + /* bind our state */ + pipe->bind_blend_state(pipe, ctx->blend); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->bind_rasterizer_state(pipe, ctx->rasterizer); + pipe->bind_vs_state(pipe, ctx->vs); + pipe->bind_fs_state(pipe, ctx->fs); +#if 0 + pipe->set_viewport_state(pipe, &ctx->viewport); +#endif + + /* + * XXX for small mipmap levels, it may be faster to use the software + * fallback path... + */ + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + + /* + * Setup framebuffer / dest surface + */ + fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + pipe->set_framebuffer_state(pipe, &fb); + + /* + * Setup sampler state + * Note: we should only have to set the min/max LOD clamps to ensure + * we grab texels from the right mipmap level. But some hardware + * has trouble with min clamping so we also set the lod_bias to + * try to work around that. + */ + sampler.min_lod = sampler.max_lod = (float) srcLevel; + sampler.lod_bias = (float) srcLevel; + sampler_cso = pipe->create_sampler_state(pipe, &sampler); + pipe->bind_sampler_states(pipe, 1, &sampler_cso); + +#if 0 + simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); +#endif + + pipe->set_sampler_textures(pipe, 1, &pt); + + /* quad coords in window coords (bypassing clipping, viewport mapping) */ + util_draw_texquad(pipe, + 0.0F, 0.0F, /* x0, y0 */ + (float) pt->width[dstLevel], /* x1 */ + (float) pt->height[dstLevel], /* y1 */ + 0.0F); /* z */ + + + pipe->flush(pipe, PIPE_FLUSH_WAIT); + + /*pipe->texture_update(pipe, pt); not really needed */ + + pipe->delete_sampler_state(pipe, sampler_cso); + } + + /* Note: caller must restore pipe/gallium state at this time */ +} diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h new file mode 100644 index 00000000000..80496140a24 --- /dev/null +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -0,0 +1,52 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_GENMIPMAP_H +#define U_GENMIPMAP_H + +#include "pipe/p_state.h" + + +struct gen_mipmap_state; + + +extern struct gen_mipmap_state * +util_create_gen_mipmap(struct pipe_context *pipe); + + +extern void +util_destroy_gen_mipmap(struct gen_mipmap_state *ctx); + + + +extern void +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel); + + +#endif diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index d25872972aa..0bfb9e1b4ae 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -226,9 +226,13 @@ handle_table_remove(struct handle_table *ht, index = handle - 1; object = ht->objects[index]; - assert(object); + if(!object) { + /* XXX: this warning may be noisy for legitimate use -- remove later */ + debug_warning("removing empty handle"); + return; + } - if(object && ht->destroy) + if(ht->destroy) ht->destroy(object); ht->objects[index] = NULL; @@ -237,6 +241,28 @@ handle_table_remove(struct handle_table *ht, } +unsigned +handle_table_get_next_handle(struct handle_table *ht, + unsigned handle) +{ + unsigned index; + + for(index = handle; index < ht->size; ++index) { + if(!ht->objects[index]) + return index + 1; + } + + return 0; +} + + +unsigned +handle_table_get_first_handle(struct handle_table *ht) +{ + return handle_table_get_next_handle(ht, 0); +} + + void handle_table_destroy(struct handle_table *ht) { diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h index a2f1f604ade..d080135c9f4 100644 --- a/src/gallium/auxiliary/util/u_handle_table.h +++ b/src/gallium/auxiliary/util/u_handle_table.h @@ -100,6 +100,15 @@ void handle_table_destroy(struct handle_table *ht); +unsigned +handle_table_get_first_handle(struct handle_table *ht); + + +unsigned +handle_table_get_next_handle(struct handle_table *ht, + unsigned handle); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index ac2cb1b540d..f3f16a8d94f 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -187,6 +187,28 @@ hash_table_remove(struct hash_table *ht, } +enum pipe_error +hash_table_foreach(struct hash_table *ht, + enum pipe_error (*callback)(void *key, void *value, void *data), + void *data) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + enum pipe_error result; + + iter = cso_hash_first_node(ht->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + result = callback(item->key, item->value, data); + if(result != PIPE_OK) + return result; + iter = cso_hash_iter_next(iter); + } + + return PIPE_OK; +} + + void hash_table_destroy(struct hash_table *ht) { @@ -196,4 +218,3 @@ hash_table_destroy(struct hash_table *ht) FREE(ht); } - diff --git a/src/gallium/auxiliary/util/u_hash_table.h b/src/gallium/auxiliary/util/u_hash_table.h index d941f2c6b16..1583bd7548c 100644 --- a/src/gallium/auxiliary/util/u_hash_table.h +++ b/src/gallium/auxiliary/util/u_hash_table.h @@ -75,6 +75,11 @@ hash_table_remove(struct hash_table *ht, void *key); +enum pipe_error +hash_table_foreach(struct hash_table *ht, + enum pipe_error (*callback)(void *key, void *value, void *data), + void *data); + void hash_table_destroy(struct hash_table *ht); diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h new file mode 100644 index 00000000000..cd13823985e --- /dev/null +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -0,0 +1,133 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Functions to produce packed colors/Z from floats. + */ + + +#ifndef U_PACK_COLOR_H +#define U_PACK_COLOR_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + + +/** + * Note rgba outside [0,1] will be clamped for int pixel formats. + */ +static INLINE void +util_pack_color(const float rgba[4], enum pipe_format format, void *dest) +{ + ubyte r, g, b, a; + + if (pf_size_x(format) <= 8) { + /* format uses 8-bit components or less */ + UNCLAMPED_FLOAT_TO_UBYTE(r, rgba[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, rgba[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, rgba[2]); + UNCLAMPED_FLOAT_TO_UBYTE(a, rgba[3]); + } + + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | a; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (a << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | a; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + } + return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *d = (float *) dest; + d[0] = rgba[0]; + d[1] = rgba[1]; + d[2] = rgba[2]; + d[3] = rgba[3]; + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *d = (float *) dest; + d[0] = rgba[0]; + d[1] = rgba[1]; + d[2] = rgba[2]; + } + return; + /* XXX lots more cases to add */ + default: + debug_printf("gallium: unhandled format in util_pack_color()"); + } +} + + +/** + * Note: it's assumed that z is in [0,1] + */ +static INLINE uint +util_pack_z(enum pipe_format format, double z) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return (uint) (z * 0xffff); + case PIPE_FORMAT_Z32_UNORM: + /* special-case to avoid overflow */ + if (z == 1.0) + return 0xffffffff; + else + return (uint) (z * 0xffffffff); + case PIPE_FORMAT_S8Z24_UNORM: + return (uint) (z * 0xffffff); + case PIPE_FORMAT_Z24S8_UNORM: + return ((uint) (z * 0xffffff)) << 8; + default: + debug_printf("gallium: unhandled fomrat in util_pack_z()"); + return 0; + } +} + + +#endif /* U_PACK_COLOR_H */ diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c new file mode 100644 index 00000000000..88e2ab05bd0 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -0,0 +1,263 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Simple vertex/fragment shader generators. + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_simple_shaders.h" + +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" + + + +/** + * Make simple vertex pass-through shader. + */ +void * +util_make_vertex_passthrough_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes) +{ + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_VERTEX; + uint ti, i; + struct pipe_shader_state shader; + + tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare inputs */ + for (i = 0; i < num_attribs; i++) { + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + /* + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + } + + /* declare outputs */ + for (i = 0; i < num_attribs; i++) { + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + } + + /* emit MOV instructions */ + for (i = 0; i < num_attribs; i++) { + /* MOVE out[i], in[i]; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + } + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader.tokens = tokens; + return pipe->create_vs_state(pipe, &shader); +} + + + + +/** + * Make simple fragment texture shader: + * TEX OUT[0], IN[0], SAMP[0], 2D; + * END; + */ +void * +util_make_fragment_tex_shader(struct pipe_context *pipe) +{ + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_FRAGMENT; + uint ti; + struct pipe_shader_state shader; + + tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare TEX[0] input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare color[0] output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* TEX instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader.tokens = tokens; + return pipe->create_fs_state(pipe, &shader); +} + diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h new file mode 100644 index 00000000000..3ef4f288018 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -0,0 +1,52 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_SIMPLE_SHADERS_H +#define U_SIMPLE_SHADERS_H + + +#include "pipe/p_compiler.h" + + +struct pipe_context; + + +extern void * +util_make_vertex_passthrough_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes); + + +extern void * +util_make_fragment_tex_shader(struct pipe_context *pipe); + + +#endif + + diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 9a4004535ea..fe93fd8e1a2 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -105,6 +105,16 @@ /** + */ +struct cell_command_depth_stencil_alpha_test { + uint64_t base; /**< Effective address of code start. */ + unsigned size; /**< Size in bytes of test code. */ + unsigned read_depth; /**< Flag: should depth be read? */ + unsigned read_stencil; /**< Flag: should stencil be read? */ +}; + + +/** * Tell SPUs about the framebuffer size, location */ struct cell_command_framebuffer diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index d38fa6ce073..0389a9554cf 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -27,6 +27,7 @@ SOURCES = \ cell_flush.c \ cell_state_derived.c \ cell_state_emit.c \ + cell_state_per_fragment.c \ cell_state_shader.c \ cell_pipe_state.c \ cell_screen.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index b221424323f..9e79db0acef 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -57,16 +57,37 @@ struct cell_fragment_shader_state }; +struct cell_blend_state { + struct pipe_blend_state base; + + /** + * Generated code to perform alpha blending + */ + struct spe_function code; +}; + + +struct cell_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + /** + * Generated code to perform alpha, stencil, and depth testing on the SPE + */ + struct spe_function code; + +}; + + struct cell_context { struct pipe_context pipe; struct cell_winsys *winsys; - const struct pipe_blend_state *blend; + const struct cell_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; uint num_samplers; - const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct cell_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; const struct cell_fragment_shader_state *fs; diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 025ed3bbbfe..c880760e4bd 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -36,6 +36,7 @@ #include "cell_context.h" #include "cell_state.h" #include "cell_texture.h" +#include "cell_state_per_fragment.h" @@ -43,7 +44,12 @@ static void * cell_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { - return mem_dup(blend, sizeof(*blend)); + struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); + + (void) memcpy(cb, blend, sizeof(*blend)); + cb->code.store = NULL; + + return cb; } @@ -54,7 +60,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend) draw_flush(cell->draw); - cell->blend = (const struct pipe_blend_state *)blend; + cell->blend = (const struct cell_blend_state *)blend; cell->dirty |= CELL_NEW_BLEND; } @@ -63,7 +69,10 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend) static void cell_delete_blend_state(struct pipe_context *pipe, void *blend) { - FREE(blend); + struct cell_blend_state *cb = (struct cell_blend_state *) blend; + + spe_release_func(& cb->code); + FREE(cb); } @@ -87,7 +96,13 @@ static void * cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { - return mem_dup(depth_stencil, sizeof(*depth_stencil)); + struct cell_depth_stencil_alpha_state *cdsa = + MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); + + (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); + cdsa->code.store = NULL; + + return cdsa; } @@ -96,12 +111,16 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth_stencil) { struct cell_context *cell = cell_context(pipe); + struct cell_depth_stencil_alpha_state *cdsa = + (struct cell_depth_stencil_alpha_state *) depth_stencil; draw_flush(cell->draw); - cell->depth_stencil - = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + if ((cdsa != NULL) && (cdsa->code.store == NULL)) { + cell_generate_depth_stencil_test(cdsa); + } + cell->depth_stencil = cdsa; cell->dirty |= CELL_NEW_DEPTH_STENCIL; } @@ -109,7 +128,11 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, static void cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) { - FREE(depth); + struct cell_depth_stencil_alpha_state *cdsa = + (struct cell_depth_stencil_alpha_state *) depth; + + spe_release_func(& cdsa->code); + FREE(cdsa); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 670eb26bdd0..4d589bcdbf9 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -71,9 +71,24 @@ cell_emit_state(struct cell_context *cell) } if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, - cell->depth_stencil, - sizeof(struct pipe_depth_stencil_alpha_state)); + struct cell_command_depth_stencil_alpha_test dsat; + + + if (cell->depth_stencil != NULL) { + dsat.base = (intptr_t) cell->depth_stencil->code.store; + dsat.size = (char *) cell->depth_stencil->code.csr + - (char *) cell->depth_stencil->code.store; + dsat.read_depth = TRUE; + dsat.read_stencil = FALSE; + } else { + dsat.base = 0; + dsat.size = 0; + dsat.read_depth = FALSE; + dsat.read_stencil = FALSE; + } + + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, + sizeof(dsat)); } if (cell->dirty & CELL_NEW_SAMPLER) { diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c new file mode 100644 index 00000000000..9c479684596 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -0,0 +1,1075 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Generate code to perform all per-fragment operations. + * + * Code generated by these functions perform both alpha, depth, and stencil + * testing as well as alpha blending. + * + * \note + * Occlusion query is not supported, but this is the right place to add that + * support. + * + * \author Ian Romanick <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "cell_context.h" + +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Generate code to perform alpha testing. + * + * The code generated by this function uses the register specificed by + * \c mask as both an input and an output. + * + * \param dsa Current alpha-test state + * \param f Function to which code should be appended + * \param mask Index of register containing active fragment mask + * \param alphas Index of register containing per-fragment alpha values + * + * \note Emits a maximum of 6 instructions. + */ +static void +emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int alphas) +{ + /* If the alpha function is either NEVER or ALWAYS, there is no need to + * load the reference value into a register. ALWAYS is a fairly common + * case, and this optimization saves 2 instructions. + */ + if (dsa->alpha.enabled + && (dsa->alpha.func != PIPE_FUNC_NEVER) + && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + int ref = spe_allocate_available_register(f); + int tmp_a = spe_allocate_available_register(f); + int tmp_b = spe_allocate_available_register(f); + union { + float f; + unsigned u; + } ref_val; + boolean complement = FALSE; + + ref_val.f = dsa->alpha.ref; + + spe_il(f, ref, ref_val.u & 0x0000ffff); + spe_ilh(f, ref, ref_val.u >> 16); + + switch (dsa->alpha.func) { + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_EQUAL: + spe_fceq(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GREATER: + spe_fcgt(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GEQUAL: + spe_fcgt(f, tmp_a, ref, alphas); + spe_fceq(f, tmp_b, ref, alphas); + spe_or(f, tmp_a, tmp_b, tmp_a); + break; + + case PIPE_FUNC_ALWAYS: + case PIPE_FUNC_NEVER: + default: + assert(0); + break; + } + + if (complement) { + spe_andc(f, mask, mask, tmp_a); + } else { + spe_and(f, mask, mask, tmp_a); + } + + spe_release_register(f, ref); + spe_release_register(f, tmp_a); + spe_release_register(f, tmp_b); + } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { + spe_il(f, mask, 0); + } +} + + +/** + * \param dsa Current depth-test state + * \param f Function to which code should be appended + * \param m Mask of allocated / free SPE registers + * \param mask Index of register to contain depth-pass mask + * \param stored Index of register containing values from depth buffer + * \param calculated Index of register containing per-fragment depth values + * + * \return + * If the calculated depth comparison mask is the actual mask, \c FALSE is + * returned. If the calculated depth comparison mask is the compliment of + * the actual mask, \c TRUE is returned. + * + * \note Emits a maximum of 3 instructions. + */ +static boolean +emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int stored, int calculated) +{ + unsigned func = (dsa->depth.enabled) + ? dsa->depth.func : PIPE_FUNC_ALWAYS; + int tmp = spe_allocate_available_register(f); + boolean compliment = FALSE; + + switch (func) { + case PIPE_FUNC_NEVER: + spe_il(f, mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceq(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgt(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LESS: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgt(f, mask, calculated, stored); + spe_ceq(f, tmp, calculated, stored); + spe_or(f, mask, mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + spe_il(f, mask, ~0); + break; + + default: + assert(0); + break; + } + + spe_release_register(f, tmp); + return compliment; +} + + +/** + * \note Emits a maximum of 5 instructions. + * + * \warning + * Since \c out and \c in might be the same register, this routine cannot + * generate code that uses \c out as a temporary. + */ +static void +emit_stencil_op(struct spe_function *f, + int out, int in, int mask, unsigned op, unsigned ref) +{ + const int clamp = spe_allocate_available_register(f); + const int clamp_mask = spe_allocate_available_register(f); + const int result = spe_allocate_available_register(f); + + switch(op) { + case PIPE_STENCIL_OP_KEEP: + assert(0); + case PIPE_STENCIL_OP_ZERO: + spe_il(f, result, 0); + break; + case PIPE_STENCIL_OP_REPLACE: + spe_il(f, result, ref); + break; + case PIPE_STENCIL_OP_INCR: + spe_il(f, clamp, 0x0ff); + spe_ai(f, result, in, 1); + spe_clgti(f, clamp_mask, result, 0x0ff); + spe_selb(f, result, result, clamp, clamp_mask); + break; + case PIPE_STENCIL_OP_DECR: + spe_il(f, clamp, 0); + spe_ai(f, result, in, -1); + + /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned + * arithmetic. + */ + spe_clgti(f, clamp_mask, result, 0x0ff); + spe_selb(f, result, result, clamp, clamp_mask); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + spe_ai(f, result, in, 1); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + spe_ai(f, result, in, -1); + break; + case PIPE_STENCIL_OP_INVERT: + spe_nor(f, result, in, in); + break; + default: + assert(0); + } + + spe_selb(f, out, in, result, mask); + + spe_release_register(f, result); + spe_release_register(f, clamp_mask); + spe_release_register(f, clamp); +} + + +/** + * \param dsa Depth / stencil test state + * \param face 0 for front face, 1 for back face + * \param f Function to append instructions to + * \param reg_mask Mask of allocated registers + * \param mask Register containing mask of fragments passing the + * alpha test + * \param depth_mask Register containing mask of fragments passing the + * depth test + * \param depth_compliment Is \c depth_mask the compliment of the actual mask? + * \param stencil Register containing values from stencil buffer + * \param depth_pass Register to store mask of fragments passing stencil test + * and depth test + * + * \note + * Emits a maximum of 10 + (3 * 5) = 25 instructions. + */ +static int +emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, + unsigned face, + struct spe_function *f, + int mask, + int depth_mask, + boolean depth_complement, + int stencil, + int depth_pass) +{ + int stencil_fail = spe_allocate_available_register(f); + int depth_fail = spe_allocate_available_register(f); + int stencil_mask = spe_allocate_available_register(f); + int stencil_pass = spe_allocate_available_register(f); + int face_stencil = spe_allocate_available_register(f); + int stencil_src = stencil; + const unsigned ref = (dsa->stencil[face].ref_value + & dsa->stencil[face].value_mask); + boolean complement = FALSE; + int stored; + int tmp = spe_allocate_available_register(f); + + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].value_mask != 0x0ff)) { + stored = spe_allocate_available_register(f); + spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); + } else { + stored = stencil; + } + + + switch (dsa->stencil[face].func) { + case PIPE_FUNC_NEVER: + spe_il(f, stencil_mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceqi(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgti(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgti(f, stencil_mask, stored, ref); + spe_ceqi(f, tmp, stored, ref); + spe_or(f, stencil_mask, stencil_mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + /* See comment below. */ + break; + + default: + assert(0); + break; + } + + if (stored != stencil) { + spe_release_register(f, stored); + } + spe_release_register(f, tmp); + + + /* ALWAYS is a very common stencil-test, so some effort is applied to + * optimize that case. The stencil-pass mask is the same as the input + * fragment mask. This makes the stencil-test (above) a no-op, and the + * input fragment mask can be "renamed" the stencil-pass mask. + */ + if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { + spe_release_register(f, stencil_pass); + stencil_pass = mask; + } else { + if (complement) { + spe_andc(f, stencil_pass, mask, stencil_mask); + } else { + spe_and(f, stencil_pass, mask, stencil_mask); + } + } + + if (depth_complement) { + spe_andc(f, depth_pass, stencil_pass, depth_mask); + } else { + spe_and(f, depth_pass, stencil_pass, depth_mask); + } + + + /* Conditionally emit code to update the stencil value under various + * condititons. Note that there is no need to generate code under the + * following circumstances: + * + * - Stencil write mask is zero. + * - For stencil-fail if the stencil test is ALWAYS + * - For depth-fail if the stencil test is NEVER + * - For depth-pass if the stencil test is NEVER + * - Any of the 3 conditions if the operation is KEEP + */ + if (dsa->stencil[face].write_mask != 0) { + if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { + if (complement) { + spe_and(f, stencil_fail, mask, stencil_mask); + } else { + spe_andc(f, stencil_fail, mask, stencil_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, + dsa->stencil[face].fail_op, + dsa->stencil[face].ref_value); + + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { + if (depth_complement) { + spe_and(f, depth_fail, stencil_pass, depth_mask); + } else { + spe_andc(f, depth_fail, stencil_pass, depth_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, depth_fail, + dsa->stencil[face].zfail_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { + emit_stencil_op(f, face_stencil, stencil_src, depth_pass, + dsa->stencil[face].zpass_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + } + + spe_release_register(f, stencil_fail); + spe_release_register(f, depth_fail); + spe_release_register(f, stencil_mask); + if (stencil_pass != mask) { + spe_release_register(f, stencil_pass); + } + + /* If all of the stencil operations were KEEP or the stencil write mask was + * zero, "stencil_src" will still be set to "stencil". In this case + * release the "face_stencil" register. Otherwise apply the stencil write + * mask to select bits from the calculated stencil value and the previous + * stencil value. + */ + if (stencil_src == stencil) { + spe_release_register(f, face_stencil); + } else if (dsa->stencil[face].write_mask != 0x0ff) { + int tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, dsa->stencil[face].write_mask); + spe_selb(f, stencil_src, stencil, stencil_src, tmp); + + spe_release_register(f, tmp); + } + + return stencil_src; +} + + +void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) +{ + struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; + struct spe_function *const f = &cdsa->code; + + /* This code generates a maximum of 6 (alpha test) + 3 (depth test) + * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round + * up to 64 to make it a happy power-of-two. + */ + spe_init_func(f, 4 * 64); + + + /* Allocate registers for the function's input parameters. Cleverly (and + * clever code is usually dangerous, but I couldn't resist) the generated + * function returns a structure. Returned structures start with register + * 3, and the structure fields are ordered to match up exactly with the + * input parameters. + */ + int mask = spe_allocate_register(f, 3); + int depth = spe_allocate_register(f, 4); + int stencil = spe_allocate_register(f, 5); + int zvals = spe_allocate_register(f, 6); + int frag_a = spe_allocate_register(f, 7); + int facing = spe_allocate_register(f, 8); + + int depth_mask = spe_allocate_available_register(f); + + boolean depth_complement; + + + emit_alpha_test(dsa, f, mask, frag_a); + + depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); + + if (dsa->stencil[0].enabled) { + const int front_depth_pass = spe_allocate_available_register(f); + int front_stencil = emit_stencil_test(dsa, 0, f, mask, + depth_mask, depth_complement, + stencil, front_depth_pass); + + if (dsa->stencil[1].enabled) { + const int back_depth_pass = spe_allocate_available_register(f); + int back_stencil = emit_stencil_test(dsa, 1, f, mask, + depth_mask, depth_complement, + stencil, back_depth_pass); + + /* If the front facing stencil value and the back facing stencil + * value are stored in the same register, there is no need to select + * a value based on the facing. This can happen if the stencil value + * was not modified due to the write masks being zero, the stencil + * operations being KEEP, etc. + */ + if (front_stencil != back_stencil) { + spe_selb(f, stencil, back_stencil, front_stencil, facing); + } + + if (back_stencil != stencil) { + spe_release_register(f, back_stencil); + } + + if (front_stencil != stencil) { + spe_release_register(f, front_stencil); + } + + spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); + + spe_release_register(f, back_depth_pass); + } else { + if (front_stencil != stencil) { + spe_or(f, stencil, front_stencil, front_stencil); + spe_release_register(f, front_stencil); + } + spe_or(f, mask, front_depth_pass, front_depth_pass); + } + + spe_release_register(f, front_depth_pass); + } else if (dsa->depth.enabled) { + if (depth_complement) { + spe_andc(f, mask, mask, depth_mask); + } else { + spe_and(f, mask, mask, depth_mask); + } + } + + if (dsa->depth.writemask) { + spe_selb(f, depth, depth, zvals, mask); + } + + spe_bi(f, 0, 0, 0); + + +#if 0 + { + const uint32_t *p = f->store; + unsigned i; + + printf("# alpha (%sabled)\n", + (dsa->alpha.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->alpha.func); + printf("# ref: %.2f\n", dsa->alpha.ref); + + printf("# depth (%sabled)\n", + (dsa->depth.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->depth.func); + + for (i = 0; i < 2; i++) { + printf("# %s stencil (%sabled)\n", + (i == 0) ? "front" : "back", + (dsa->stencil[i].enabled) ? "en" : "dis"); + + printf("# func: %u\n", dsa->stencil[i].func); + printf("# op (sf, zf, zp): %u %u %u\n", + dsa->stencil[i].fail_op, + dsa->stencil[i].zfail_op, + dsa->stencil[i].zpass_op); + printf("# ref value / value mask / write mask: %02x %02x %02x\n", + dsa->stencil[i].ref_value, + dsa->stencil[i].value_mask, + dsa->stencil[i].write_mask); + } + + printf("\t.text\n"); + for (/* empty */; p < f->csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } +#endif +} + + +/** + * \note Emits a maximum of 3 instructions + */ +static int +emit_alpha_factor_calculation(struct spe_function *f, + unsigned factor, float const_alpha, + int src_alpha, int dst_alpha) +{ + union { + float f; + unsigned u; + } alpha; + int factor_reg; + int tmp; + + + alpha.f = const_alpha; + + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_or(f, factor_reg, src_alpha, src_alpha); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor_reg = dst_alpha; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + const_alpha = 1.0 - const_alpha; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_il(f, factor_reg, alpha.u & 0x0ffff); + spe_ilh(f, factor_reg, alpha.u >> 16); + break; + + case PIPE_BLENDFACTOR_ZERO: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, src_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, dst_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + factor_reg = -1; + break; + } + + return factor_reg; +} + + +/** + * \note Emits a maximum of 5 instructions + */ +static void +emit_color_factor_calculation(struct spe_function *f, + unsigned sF, unsigned mask, + const struct pipe_blend_color *blend_color, + const int *src, + const int *dst, + int *factor) +{ + union { + float f[4]; + unsigned u[4]; + } color; + int tmp; + unsigned i; + + + color.f[0] = blend_color->color[0]; + color.f[1] = blend_color->color[1]; + color.f[2] = blend_color->color[2]; + color.f[3] = blend_color->color[3]; + + factor[0] = -1; + factor[1] = -1; + factor[2] = -1; + factor[3] = -1; + + switch (sF) { + case PIPE_BLENDFACTOR_ONE: + break; + + case PIPE_BLENDFACTOR_SRC_COLOR: + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_or(f, factor[i], src[i], src[i]); + } + } + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_or(f, factor[0], src[3], src[3]); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor[0] = dst[3]; + factor[1] = dst[3]; + factor[2] = dst[3]; + break; + + case PIPE_BLENDFACTOR_DST_COLOR: + factor[0] = dst[0]; + factor[1] = dst[1]; + factor[2] = dst[2]; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + /* Alpha saturate means min(As, 1-Ad). + */ + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, tmp, tmp, dst[3]); + spe_fcgt(f, factor[0], tmp, src[3]); + spe_selb(f, factor[0], src[3], tmp, factor[0]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + color.f[0] = 1.0 - color.f[0]; + color.f[1] = 1.0 - color.f[1]; + color.f[2] = 1.0 - color.f[2]; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_COLOR: + for (i = 0; i < 3; i++) { + factor[i] = spe_allocate_available_register(f); + + spe_il(f, factor[i], color.u[i] & 0x0ffff); + spe_ilh(f, factor[i], color.u[i] >> 16); + } + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + color.f[3] = 1.0 - color.f[3]; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, factor[0], color.u[3] & 0x0ffff); + spe_ilh(f, factor[0], color.u[3] >> 16); + break; + + case PIPE_BLENDFACTOR_ZERO: + break; + + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, src[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, src[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, dst[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, dst[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + } +} + + +static void +emit_blend_calculation(struct spe_function *f, + unsigned func, unsigned sF, unsigned dF, + int src, int src_factor, int dst, int dst_factor) +{ + int tmp = spe_allocate_available_register(f); + + switch (func) { + case PIPE_BLEND_ADD: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fa(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fma(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, dst); + } + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fms(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_REVERSE_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, src); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, dst, src); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } + } else { + spe_fm(f, tmp, src, src_factor); + spe_fms(f, src, src, dst_factor, tmp); + } + break; + + case PIPE_BLEND_MIN: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, dst, src, tmp); + break; + + case PIPE_BLEND_MAX: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, src, dst, tmp); + break; + + default: + assert(0); + } + + spe_release_register(f, tmp); +} + + +/** + * Generate code to perform alpha blending on the SPE + */ +void +cell_generate_alpha_blend(struct cell_blend_state *cb, + const struct pipe_blend_color *blend_color) +{ + struct pipe_blend_state *const b = &cb->base; + struct spe_function *const f = &cb->code; + + /* This code generates a maximum of 3 (source alpha factor) + * + 3 (destination alpha factor) + (3 * 5) (source color factor) + * + (3 * 5) (destination color factor) + (4 * 2) (blend equation) + * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to + * make it a happy power-of-two. + */ + spe_init_func(f, 4 * 64); + + + const int frag[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + const int pixel[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + const int mask = spe_allocate_register(f, 11); + unsigned func[4]; + unsigned sF[4]; + unsigned dF[4]; + unsigned i; + int src_factor[4]; + int dst_factor[4]; + + + /* Does the selected blend mode make use of the source / destination + * color (RGB) blend factors? + */ + boolean need_color_factor = b->blend_enable + && (b->rgb_func != PIPE_BLEND_MIN) + && (b->rgb_func != PIPE_BLEND_MAX); + + /* Does the selected blend mode make use of the source / destination + * alpha blend factors? + */ + boolean need_alpha_factor = b->blend_enable + && (b->alpha_func != PIPE_BLEND_MIN) + && (b->alpha_func != PIPE_BLEND_MAX); + + + sF[0] = b->rgb_src_factor; + sF[1] = sF[0]; + sF[2] = sF[0]; + sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) + ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor; + + dF[0] = b->rgb_dst_factor; + dF[1] = dF[0]; + dF[2] = dF[0]; + dF[3] = b->rgb_dst_factor; + + + /* If alpha writing is enabled and the alpha blend mode requires use of + * the alpha factor, calculate the alpha factor. + */ + if (((b->colormask & 8) != 0) && need_alpha_factor) { + src_factor[3] = emit_alpha_factor_calculation(f, sF[3], + blend_color->color[3], + frag[3], pixel[3]); + + /* If the alpha destination blend factor is the same as the alpha source + * blend factor, re-use the previously calculated value. + */ + dst_factor[3] = (dF[3] == sF[3]) + ? src_factor[3] + : emit_alpha_factor_calculation(f, dF[3], + blend_color->color[3], + frag[3], pixel[3]); + } + + + if (sF[0] == sF[3]) { + src_factor[0] = src_factor[3]; + src_factor[1] = src_factor[3]; + src_factor[2] = src_factor[3]; + } else if (sF[0] == dF[3]) { + src_factor[0] = dst_factor[3]; + src_factor[1] = dst_factor[3]; + src_factor[2] = dst_factor[3]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_src_factor, + b->colormask, + blend_color, + frag, pixel, src_factor); + } + + + if (dF[0] == sF[3]) { + dst_factor[0] = src_factor[3]; + dst_factor[1] = src_factor[3]; + dst_factor[2] = src_factor[3]; + } else if (dF[0] == dF[3]) { + dst_factor[0] = dst_factor[3]; + dst_factor[1] = dst_factor[3]; + dst_factor[2] = dst_factor[3]; + } else if (dF[0] == sF[0]) { + dst_factor[0] = src_factor[0]; + dst_factor[1] = src_factor[1]; + dst_factor[2] = src_factor[2]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_dst_factor, + b->colormask, + blend_color, + frag, pixel, dst_factor); + } + + + + func[0] = b->rgb_func; + func[1] = func[0]; + func[2] = func[0]; + func[3] = b->alpha_func; + + for (i = 0; i < 4; ++i) { + if ((b->colormask & (1U << i)) != 0) { + emit_blend_calculation(f, + func[i], sF[i], dF[i], + frag[i], src_factor[i], + pixel[i], dst_factor[i]); + spe_selb(f, frag[i], pixel[i], frag[i], mask); + } else { + spe_or(f, frag[i], pixel[i], pixel[i]); + } + } + + spe_bi(f, 0, 0, 0); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h new file mode 100644 index 00000000000..541c3b3be07 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -0,0 +1,35 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CELL_STATE_PER_FRAGMENT_H +#define CELL_STATE_PER_FRAGMENT_H + +extern void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); + +extern void +cell_generate_alpha_blend(struct cell_blend_state *cb, + const struct pipe_blend_color *blend_color); + +#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index c071de1900b..115ca8cd901 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -19,6 +19,7 @@ SOURCES = \ spu_main.c \ spu_blend.c \ spu_dcache.c \ + spu_per_fragment_op.c \ spu_render.c \ spu_texture.c \ spu_tile.c \ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 59300028d4d..937962285d0 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -58,6 +58,9 @@ struct spu_vs_context draw; static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] ALIGN16_ATTRIB; +static unsigned char depth_stencil_code_buffer[4 * 64] + ALIGN16_ATTRIB; + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -215,12 +218,19 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - if (spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM) + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: spu.fb.zsize = 4; - else if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) + break; + case PIPE_FORMAT_Z16_UNORM: spu.fb.zsize = 2; - else + break; + default: spu.fb.zsize = 0; + break; + } if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) spu.color_shuffle = ((vector unsigned char) { @@ -248,14 +258,35 @@ cmd_state_blend(const struct pipe_blend_state *state) static void -cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state) +cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state) { if (Debug) printf("SPU %u: DEPTH_STENCIL: ztest %d\n", spu.init.id, - state->depth.enabled); + state->read_depth); + + ASSERT_ALIGN16(state->base); + + if (state->size != 0) { + mfc_get(depth_stencil_code_buffer, + (unsigned int) state->base, /* src */ + ROUNDUP16(state->size), + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + } else { + /* If there is no code, emit a return instruction. + */ + depth_stencil_code_buffer[0] = 0x35; + depth_stencil_code_buffer[1] = 0x00; + depth_stencil_code_buffer[2] = 0x00; + depth_stencil_code_buffer[3] = 0x00; + } - memcpy(&spu.depth_stencil, state, sizeof(*state)); + spu.frag_test = (frag_test_func) depth_stencil_code_buffer; + spu.read_depth = state->read_depth; + spu.read_stencil = state->read_stencil; } @@ -415,9 +446,9 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); break; case CELL_CMD_STATE_DEPTH_STENCIL: - cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) + cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8); break; case CELL_CMD_STATE_SAMPLER: cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index a13edd17029..444e2186452 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -56,6 +56,17 @@ typedef union { #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ +struct spu_frag_test_results { + qword mask; + qword depth; + qword stencil; +}; + +typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask, + qword pixel_depth, qword pixel_stencil, qword frag_depth, + qword frag_alpha, qword facing); + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -79,8 +90,9 @@ struct spu_global struct cell_init_info init; struct spu_framebuffer fb; - struct pipe_blend_state blend_stencil; - struct pipe_depth_stencil_alpha_state depth_stencil; + boolean read_depth; + boolean read_stencil; + frag_test_func frag_test; struct pipe_blend_state blend; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct cell_command_texture texture; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c new file mode 100644 index 00000000000..b4cffeeb32a --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -0,0 +1,211 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file spu_per_fragment_op.c + * SPU implementation various per-fragment operations. + * + * \author Ian Romanick <[email protected]> + */ + +#include "pipe/p_format.h" +#include "spu_main.h" +#include "spu_per_fragment_op.h" + +#define ZERO 0x80 + +static void +read_ds_quad(tile_t *buffer, unsigned x, unsigned y, + enum pipe_format depth_format, qword *depth, + qword *stencil) +{ + const int ix = x / 2; + const int iy = y / 2; + + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: { + qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; + + const qword shuf_vec = (qword) { + ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, + ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7 + }; + + + /* At even X values we want the first 4 shorts, and at odd X values we + * want the second 4 shorts. + */ + qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3)); + qword bias_mask = si_fsmbi(0x3333); + qword sv = si_a(shuf_vec, si_and(bias_mask, bias)); + + *depth = si_shufb(*ptr, *ptr, sv); + *stencil = si_il(0); + break; + } + + + case PIPE_FORMAT_Z32_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + + *depth = *ptr; + *stencil = si_il(0); + break; + } + + + case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0xEEEE); + + *depth = si_rotmai(si_and(*ptr, mask), -8); + *stencil = si_andc(*ptr, mask); + break; + } + + + case PIPE_FORMAT_S8Z24_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + + *depth = si_and(*ptr, si_fsmbi(0x7777)); + *stencil = si_andi(si_roti(*ptr, 8), 0x0ff); + break; + } + + + default: + assert(0); + break; + } +} + + +static void +write_ds_quad(tile_t *buffer, unsigned x, unsigned y, + enum pipe_format depth_format, + qword depth, qword stencil) +{ + const int ix = x / 2; + const int iy = y / 2; + + (void) stencil; + + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: { + qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; + + qword sv = ((ix & 0x01) == 0) + ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 26, 27, 28, 29, 30, 31 } + : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23, + 2, 3, 6, 7, 10, 11, 14, 15 }; + *ptr = si_shufb(depth, *ptr, sv); + break; + } + + + case PIPE_FORMAT_Z32_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + *ptr = depth; + break; + } + + + case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0xEEEE); + + depth = si_shli(depth, 8); + *ptr = si_selb(stencil, depth, mask); + break; + } + + + case PIPE_FORMAT_S8Z24_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0x7777); + + stencil = si_shli(stencil, 24); + *ptr = si_selb(stencil, depth, mask); + break; + } + + + default: + assert(0); + break; + } +} + + +qword +spu_do_depth_stencil(int x, int y, + qword frag_mask, qword frag_depth, qword frag_alpha, + qword facing) +{ + struct spu_frag_test_results result; + qword pixel_depth; + qword pixel_stencil; + + /* All of this preable code (everthing before the call to frag_test) should + * be generated on the PPU and upload to the SPU. + */ + if (spu.read_depth || spu.read_stencil) { + read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, + &pixel_depth, &pixel_stencil); + } + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z16_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + case PIPE_FORMAT_Z32_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + default: + ASSERT(0); + break; + } + + result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil, + frag_depth, frag_alpha, facing); + + + /* This code (everthing after the call to frag_test) should + * be generated on the PPU and upload to the SPU. + */ + if (spu.read_depth || spu.read_stencil) { + write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, + result.depth, result.stencil); + } + + return result.mask; +} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h new file mode 100644 index 00000000000..65712586992 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -0,0 +1,32 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_PER_FRAGMENT_OP +#define SPU_PER_FRAGMENT_OP + +extern qword +spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, + qword frag_alpha, qword facing); + +#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 20e77aa2e63..6df59abd36d 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -98,7 +98,7 @@ my_tile(uint tx, uint ty) static INLINE void get_cz_tiles(uint tx, uint ty) { - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); @@ -153,7 +153,7 @@ static INLINE void wait_put_cz_tiles(void) { wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { wait_on_mask(1 << TAG_WRITE_TILE_Z); } } diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index 3105b848fdc..1b5491112db 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -56,13 +56,13 @@ clear_c_tile(tile_t *ctile) static INLINE void clear_z_tile(tile_t *ztile) { - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + if (spu.fb.zsize == 2) { memset16((ushort*) ztile->us, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); } else { - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); + ASSERT(spu.fb.zsize != 0); memset32((uint*) ztile->ui, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index be9624cf7d9..81823f24633 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -38,8 +38,7 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" - -#include "spu_ztest.h" +#include "spu_per_fragment_op.h" /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ @@ -264,16 +263,12 @@ do_depth_test(int x, int y, mask_t quadmask) zvals.v = eval_z((float) x, (float) y); - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - int ix = (x - setup.cliprect_minx) / 4; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); - } - else { - int ix = (x - setup.cliprect_minx) / 2; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); - } + mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, + y - setup.cliprect_miny, + (qword) quadmask, + (qword) zvals.v, + (qword) spu_splats((unsigned char) 0x0ffu), + (qword) spu_splats((unsigned int) 0x01u)); if (spu_extract(spu_orx(mask), 0)) spu.cur_ztile_status = TILE_STATUS_DIRTY; @@ -299,7 +294,7 @@ emit_quad( int x, int y, mask_t mask ) sp->quad.first->run(sp->quad.first, &setup.quad); #else - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { mask = do_depth_test(x, y, mask); } @@ -434,7 +429,7 @@ static void flush_spans( void ) } ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ //printf("SPU: %u: waiting for ztile\n", spu.init.id); diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h deleted file mode 100644 index ce8ad003393..00000000000 --- a/src/gallium/drivers/cell/spu/spu_ztest.h +++ /dev/null @@ -1,135 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Zbuffer/depth test code. - */ - - -#ifndef SPU_ZTEST_H -#define SPU_ZTEST_H - - -#ifdef __SPU__ -#include <spu_intrinsics.h> -#endif - - - -/** - * Perform Z testing for a 16-bit/value Z buffer. - * - * \param zvals vector of four fragment zvalues as floats - * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this - * contains the Z values for 2 quads, 8 pixels. - * \param x x coordinate of quad (only lsbit is significant) - * \param inMask indicates which fragments in the quad are alive - * \return new mask indicating which fragments are alive after ztest - */ -static INLINE vector unsigned int -spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, - uint x, vector unsigned int inMask) -{ -#define ZERO 0x80 - vector unsigned int zvals_ui4, zbuf_ui4, mask; - - /* convert floats to uints in [0, 65535] */ - zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ - zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ - - /* XXX this conditional could be removed with a bit of work */ - if (x & 1) { - /* convert zbuffer values from ushorts to uints */ - /* gather lower four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, - ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 16, 17, 18, 19, 20, 21, 22, 23, - 2, 3, 6, 7, 10, 11, 14, 15})); - } - else { - /* convert zbuffer values from ushorts to uints */ - /* gather upper four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, - ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 2, 3, 6, 7, 10, 11, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31})); - } - return mask; -#undef ZERO -} - - -/** - * As above, but Zbuffer values as 32-bit uints - */ -static INLINE vector unsigned int -spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, - vector unsigned int inMask) -{ - vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; - - /* convert floats to uints in [0, 0xffffffff] */ - zvals_ui4 = spu_convtu(zvals, 32); - /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); - - return mask; -} - - -#endif /* SPU_ZTEST_H */ diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 9d5f609220a..eb64f51943b 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -116,7 +116,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, } -static void +static boolean i915_vbuf_render_set_primitive( struct vbuf_render *render, unsigned prim ) { @@ -125,15 +125,17 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, switch(prim) { case PIPE_PRIM_POINTS: i915_render->hwprim = PRIM3D_POINTLIST; - break; + return TRUE; case PIPE_PRIM_LINES: i915_render->hwprim = PRIM3D_LINELIST; - break; + return TRUE; case PIPE_PRIM_TRIANGLES: i915_render->hwprim = PRIM3D_TRILIST; - break; + return TRUE; default: - assert(0); + /* Actually, can handle a lot more just fine... Fixme. + */ + return FALSE; } } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index db0913cb2bc..d940718ed2b 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -101,11 +101,20 @@ sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, } -static void +static boolean sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - cvbr->prim = prim; + if (prim == PIPE_PRIM_TRIANGLES || + prim == PIPE_PRIM_LINES || + prim == PIPE_PRIM_POINTS) { + cvbr->prim = prim; + return TRUE; + } + else { + return FALSE; + } + } @@ -207,6 +216,27 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) (struct vertex_header *) ((char *) vertex_buffer + (I) * vertex_size) switch (cvbr->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + prim.v[0] = VERTEX(i); + setup->point( setup, &prim ); + } + break; + case PIPE_PRIM_LINES: + assert(nr % 2 == 0); + for (i = 0; i < nr; i += 2) { + prim.v[0] = VERTEX(i); + prim.v[1] = VERTEX(i + 1); + setup->line( setup, &prim ); + } + break; + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i++) { + prim.v[0] = VERTEX(i - 1); + prim.v[1] = VERTEX(i); + setup->line( setup, &prim ); + } + break; case PIPE_PRIM_TRIANGLES: assert(nr % 3 == 0); for (i = 0; i < nr; i += 3) { @@ -217,6 +247,58 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) setup->tri( setup, &prim ); } break; + case PIPE_PRIM_TRIANGLE_STRIP: + assert(nr >= 3); + for (i = 2; i < nr; i++) { + prim.v[0] = VERTEX(i - 2); + prim.v[1] = VERTEX(i - 1); + prim.v[2] = VERTEX(i); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + case PIPE_PRIM_TRIANGLE_FAN: + assert(nr >= 3); + for (i = 2; i < nr; i++) { + prim.v[0] = VERTEX(0); + prim.v[1] = VERTEX(i - 1); + prim.v[2] = VERTEX(i); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + case PIPE_PRIM_QUADS: + assert(nr % 4 == 0); + for (i = 0; i < nr; i += 4) { + prim.v[0] = VERTEX(i + 0); + prim.v[1] = VERTEX(i + 1); + prim.v[2] = VERTEX(i + 2); + calc_det(&prim); + setup->tri( setup, &prim ); + + prim.v[0] = VERTEX(i + 0); + prim.v[1] = VERTEX(i + 2); + prim.v[2] = VERTEX(i + 3); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + assert(nr >= 4); + for (i = 2; i < nr; i += 2) { + prim.v[0] = VERTEX(i - 2); + prim.v[1] = VERTEX(i); + prim.v[2] = VERTEX(i + 1); + calc_det(&prim); + setup->tri( setup, &prim ); + + prim.v[0] = VERTEX(i - 2); + prim.v[1] = VERTEX(i + 1); + prim.v[2] = VERTEX(i - 1); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; case PIPE_PRIM_POLYGON: /* draw as tri fan */ for (i = 2; i < nr; i++) { diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index f971ad3adc0..f3dfa062161 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -158,6 +158,16 @@ void debug_mask_vprintf(uint32_t uuid, const char *format, va_list ap); + +#ifdef DEBUG +#define debug_warning(__msg) \ + debug_printf("%s:%i:warning: %s\n", __FILE__, __LINE__, (__msg)) +#else +#define debug_warning(__msg) \ + ((void)0) +#endif + + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index ef36ce75f7d..c2e0f8c6a5d 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -88,14 +88,16 @@ FREE( void *ptr ) static INLINE void * REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) { - void *new_ptr; - if( new_size <= old_size ) { - return old_ptr; - } - new_ptr = MALLOC( new_size ); - if( new_ptr ) { - memcpy( new_ptr, old_ptr, old_size ); + void *new_ptr = NULL; + + if (new_size != 0) { + new_ptr = MALLOC( new_size ); + + if( new_ptr && old_ptr ) { + memcpy( new_ptr, old_ptr, old_size ); + } } + FREE( old_ptr ); return new_ptr; } diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index e385b9d9973..9e5169eff17 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -1576,9 +1576,6 @@ parse_attrib_binding(GLcontext * ctx, const GLubyte ** inst, if (err) { program_error(ctx, Program->Position, "Bad attribute binding"); } - else { - Program->Base.InputsRead |= (1 << *inputReg); - } return err; } @@ -2557,6 +2554,11 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, return 1; } + /* Add attributes to InputsRead only if they are used the program. + * This avoids the handling of unused ATTRIB declarations in the drivers. */ + if (*File == PROGRAM_INPUT) + Program->Base.InputsRead |= (1 << *Index); + return 0; } diff --git a/src/mesa/sources b/src/mesa/sources index e3d5f228493..287af7121aa 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -172,6 +172,7 @@ STATETRACKER_SOURCES = \ state_tracker/st_atom_texture.c \ state_tracker/st_atom_viewport.c \ state_tracker/st_cb_accum.c \ + state_tracker/st_cb_blit.c \ state_tracker/st_cb_bufferobjects.c \ state_tracker/st_cb_clear.c \ state_tracker/st_cb_flush.c \ diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c new file mode 100644 index 00000000000..dfa79c975c1 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -0,0 +1,125 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/texformat.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "st_context.h" +#include "st_program.h" +#include "st_cb_drawpixels.h" +#include "st_cb_blit.h" +#include "st_cb_fbo.h" + +#include "util/u_blit.h" + +#include "cso_cache/cso_context.h" + + +void +st_init_blit(struct st_context *st) +{ + st->blit = util_create_blit(st->pipe); +} + + +void +st_destroy_blit(struct st_context *st) +{ + util_destroy_blit(st->blit); + st->blit = NULL; +} + + +static void +st_BlitFramebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + + const uint pFilter = ((filter == GL_NEAREST) + ? PIPE_TEX_MIPFILTER_NEAREST + : PIPE_TEX_MIPFILTER_LINEAR); + + if (mask & GL_COLOR_BUFFER_BIT) { + struct st_renderbuffer *srcRb = + st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + struct st_renderbuffer *dstRb = + st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]); + struct pipe_surface *srcSurf = srcRb->surface; + struct pipe_surface *dstSurf = dstRb->surface; + + srcY0 = srcRb->Base.Height - srcY0; + srcY1 = srcRb->Base.Height - srcY1; + + dstY0 = dstRb->Base.Height - dstY0; + dstY1 = dstRb->Base.Height - dstY1; + + util_blit_pixels(st->blit, + srcSurf, srcX0, srcY0, srcX1, srcY1, + dstSurf, dstX0, dstY0, dstX1, dstY1, + 0.0, pFilter); + + } + +#if 0 + /* XXX is this sufficient? */ + st_invalidate_state(ctx, _NEW_COLOR | _NEW_TEXTURE); +#else + /* need to "unset" cso state because we went behind the back of the cso + * tracker. Without unset, the _set_ calls would be no-ops. + */ + cso_unset_blend(st->cso_context); + cso_unset_depth_stencil_alpha(st->cso_context); + cso_unset_rasterizer(st->cso_context); + cso_set_blend(st->cso_context, &st->state.blend); + cso_set_depth_stencil_alpha(st->cso_context, &st->state.depth_stencil); + cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + pipe->bind_fs_state(pipe, st->fp->driver_shader); + pipe->bind_vs_state(pipe, st->vp->driver_shader); +#endif +} + + + +void +st_init_blit_functions(struct dd_function_table *functions) +{ + functions->BlitFramebuffer = st_BlitFramebuffer; +} diff --git a/src/mesa/state_tracker/st_cb_blit.h b/src/mesa/state_tracker/st_cb_blit.h new file mode 100644 index 00000000000..ed22986b531 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_blit.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_BLIT_H +#define ST_CB_BLIT_H + + +#include "st_context.h" + + + +extern void +st_init_blit(struct st_context *st); + +extern void +st_destroy_blit(struct st_context *st); + +extern void +st_init_blit_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_BLIT_H */ diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 58650714398..693cddedf75 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -48,6 +48,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" +#include "util/u_pack_color.h" #include "cso_cache/cso_context.h" @@ -56,55 +57,6 @@ #define TEST_DRAW_PASSTHROUGH 0 -static GLuint -color_value(enum pipe_format pipeFormat, const GLfloat color[4]) -{ - GLubyte r, g, b, a; - - UNCLAMPED_FLOAT_TO_UBYTE(r, color[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, color[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, color[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, color[3]); - - switch (pipeFormat) { - case PIPE_FORMAT_R8G8B8A8_UNORM: - return (r << 24) | (g << 16) | (b << 8) | a; - case PIPE_FORMAT_A8R8G8B8_UNORM: - return (a << 24) | (r << 16) | (g << 8) | b; - case PIPE_FORMAT_B8G8R8A8_UNORM: - return (b << 24) | (g << 16) | (r << 8) | a; - case PIPE_FORMAT_R5G6B5_UNORM: - return ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); - default: - assert(0); - return 0; - } -} - - -static uint -depth_value(enum pipe_format pipeFormat, GLfloat value) -{ - switch (pipeFormat) { - case PIPE_FORMAT_Z16_UNORM: - return (uint) (value * 0xffff); - case PIPE_FORMAT_Z32_UNORM: - /* special-case to avoid overflow */ - if (value == 1.0) - return 0xffffffff; - else - return (uint) (value * 0xffffffff); - case PIPE_FORMAT_S8Z24_UNORM: - return (uint) (value * 0xffffff); - case PIPE_FORMAT_Z24S8_UNORM: - return ((uint) (value * 0xffffff)) << 8; - default: - assert(0); - return 0; - } -} - - static GLboolean is_depth_stencil_format(enum pipe_format pipeFormat) { @@ -405,6 +357,8 @@ clear_with_quad(GLcontext *ctx, st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); #else /* Restore pipe state */ + cso_set_blend(st->cso_context, &st->state.blend); + cso_set_depth_stencil_alpha(st->cso_context, &st->state.depth_stencil); cso_set_rasterizer(st->cso_context, &st->state.rasterizer); pipe->bind_fs_state(pipe, st->fp->driver_shader); pipe->bind_vs_state(pipe, st->vp->driver_shader); @@ -518,7 +472,6 @@ check_clear_stencil_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb) - static void clear_color_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) { @@ -527,10 +480,10 @@ clear_color_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) clear_with_quad(ctx, GL_TRUE, GL_FALSE, GL_FALSE); } else { - struct st_renderbuffer *strb = st_renderbuffer(rb); - /* clear whole buffer w/out masking */ - uint clearValue = color_value(strb->surface->format, ctx->Color.ClearColor); + struct st_renderbuffer *strb = st_renderbuffer(rb); + uint clearValue; + util_pack_color(ctx->Color.ClearColor, strb->surface->format, &clearValue); ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue); } } @@ -547,7 +500,7 @@ clear_depth_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) struct st_renderbuffer *strb = st_renderbuffer(rb); /* simple clear of whole buffer */ - uint clearValue = depth_value(strb->surface->format, ctx->Depth.Clear); + uint clearValue = util_pack_z(strb->surface->format, ctx->Depth.Clear); ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue); } } @@ -591,7 +544,7 @@ clear_depth_stencil_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) struct st_renderbuffer *strb = st_renderbuffer(rb); /* clear whole buffer w/out masking */ - GLuint clearValue = depth_value(strb->surface->format, ctx->Depth.Clear); + GLuint clearValue = util_pack_z(strb->surface->format, ctx->Depth.Clear); switch (strb->surface->format) { case PIPE_FORMAT_S8Z24_UNORM: diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 5458ab420e4..e1fc885e0e7 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -36,6 +36,7 @@ #include "st_context.h" #include "st_cb_accum.h" #include "st_cb_bufferobjects.h" +#include "st_cb_blit.h" #include "st_cb_clear.h" #include "st_cb_drawpixels.h" #include "st_cb_fbo.h" @@ -100,6 +101,7 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) st_init_atoms( st ); st_init_draw( st ); st_init_generate_mipmap(st); + st_init_blit(st); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) st->state.sampler_list[i] = &st->state.samplers[i]; @@ -151,6 +153,8 @@ static void st_destroy_context_priv( struct st_context *st ) draw_destroy(st->draw); st_destroy_atoms( st ); st_destroy_draw( st ); + st_destroy_generate_mipmap(st); + st_destroy_blit(st); _vbo_DestroyContext(st->ctx); @@ -217,6 +221,7 @@ void st_init_driver_functions(struct dd_function_table *functions) st_init_accum_functions(functions); st_init_bufferobject_functions(functions); + st_init_blit_functions(functions); st_init_clear_functions(functions); st_init_drawpixels_functions(functions); st_init_fbo_functions(functions); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index e81aebba3d2..63150dbeaf6 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -40,6 +40,9 @@ struct draw_context; struct draw_stage; struct cso_cache; struct cso_blend; +struct gen_mipmap_state; +struct blit_state; + #define ST_NEW_MESA 0x1 /* Mesa state has changed */ #define ST_NEW_FRAGMENT_PROGRAM 0x2 @@ -146,18 +149,8 @@ struct st_context struct st_fragment_program *combined_prog; } bitmap; - /** For gen/render mipmap feature */ - struct { - struct pipe_blend_state blend; - struct pipe_depth_stencil_alpha_state depthstencil; - struct pipe_rasterizer_state rasterizer; - - void *blend_cso; - void *depthstencil_cso; - void *rasterizer_cso; - struct st_fragment_program *stfp; - struct st_vertex_program *stvp; - } gen_mipmap; + struct gen_mipmap_state *gen_mipmap; + struct blit_state *blit; struct cso_context *cso_context; }; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 99d2a5fb9e9..0962b5f74c5 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -143,6 +143,7 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_blend_logic_op = GL_TRUE; ctx->Extensions.EXT_blend_minmax = GL_TRUE; ctx->Extensions.EXT_blend_subtract = GL_TRUE; + ctx->Extensions.EXT_framebuffer_blit = GL_TRUE; ctx->Extensions.EXT_framebuffer_object = GL_TRUE; ctx->Extensions.EXT_fog_coord = GL_TRUE; ctx->Extensions.EXT_multi_draw_arrays = GL_TRUE; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 9c4e1032efe..6c3afca1ba1 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -37,6 +37,8 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" +#include "util/u_gen_mipmap.h" + #include "cso_cache/cso_cache.h" #include "cso_cache/cso_context.h" @@ -49,55 +51,6 @@ #include "st_cb_texture.h" - -static struct st_fragment_program * -make_tex_fragment_program(GLcontext *ctx) -{ - struct st_fragment_program *stfp; - struct gl_program *p; - GLuint ic = 0; - - p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - if (!p) - return NULL; - - p->NumInstructions = 2; - - p->Instructions = _mesa_alloc_instructions(p->NumInstructions); - if (!p->Instructions) { - ctx->Driver.DeleteProgram(ctx, p); - return NULL; - } - _mesa_init_instructions(p->Instructions, p->NumInstructions); - - /* TEX result.color, fragment.texcoord[0], texture[0], 2D; */ - p->Instructions[ic].Opcode = OPCODE_TEX; - p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLR; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; - p->Instructions[ic].TexSrcUnit = 0; - p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - - /* END; */ - p->Instructions[ic++].Opcode = OPCODE_END; - - assert(ic == p->NumInstructions); - - p->InputsRead = FRAG_BIT_TEX0; - p->OutputsWritten = (1 << FRAG_RESULT_COLR); - - stfp = (struct st_fragment_program *) p; - - st_translate_fragment_program(ctx->st, stfp, NULL); - - return stfp; -} - - - - /** * one-time init for generate mipmap * XXX Note: there may be other times we need no-op/simple state like this. @@ -106,117 +59,18 @@ make_tex_fragment_program(GLcontext *ctx) void st_init_generate_mipmap(struct st_context *st) { - struct pipe_context *pipe = st->pipe; - struct pipe_blend_state blend; - struct pipe_rasterizer_state rasterizer; - struct pipe_depth_stencil_alpha_state depthstencil; - - /* we don't use blending, but need to set valid values */ - memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask = PIPE_MASK_RGBA; - st->gen_mipmap.blend = blend; - st->gen_mipmap.blend_cso = pipe->create_blend_state(pipe, &blend); - - memset(&depthstencil, 0, sizeof(depthstencil)); - st->gen_mipmap.depthstencil_cso = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); - - /* Note: we're assuming zero is valid for all non-specified fields */ - memset(&rasterizer, 0, sizeof(rasterizer)); - rasterizer.front_winding = PIPE_WINDING_CW; - rasterizer.cull_mode = PIPE_WINDING_NONE; - st->gen_mipmap.rasterizer_cso = pipe->create_rasterizer_state(pipe, &rasterizer); - - st->gen_mipmap.stfp = make_tex_fragment_program(st->ctx); - st->gen_mipmap.stvp = st_make_passthrough_vertex_shader(st, GL_FALSE); + st->gen_mipmap = util_create_gen_mipmap(st->pipe); } void -st_destroy_generate_mipmpap(struct st_context *st) -{ - struct pipe_context *pipe = st->pipe; - - pipe->delete_blend_state(pipe, st->gen_mipmap.blend_cso); - pipe->delete_depth_stencil_alpha_state(pipe, st->gen_mipmap.depthstencil_cso); - pipe->delete_rasterizer_state(pipe, st->gen_mipmap.rasterizer_cso); - - /* XXX free stfp, stvp */ -} - - -static void -simple_viewport(struct pipe_context *pipe, uint width, uint height) +st_destroy_generate_mipmap(struct st_context *st) { - struct pipe_viewport_state vp; - - vp.scale[0] = 0.5 * width; - vp.scale[1] = -0.5 * height; - vp.scale[2] = 1.0; - vp.scale[3] = 1.0; - vp.translate[0] = 0.5 * width; - vp.translate[1] = 0.5 * height; - vp.translate[2] = 0.0; - vp.translate[3] = 0.0; - - pipe->set_viewport_state(pipe, &vp); + util_destroy_gen_mipmap(st->gen_mipmap); + st->gen_mipmap = NULL; } - -/* - * Draw simple [-1,1]x[-1,1] quad - */ -static void -draw_quad(GLcontext *ctx) -{ - GLfloat verts[4][2][4]; /* four verts, two attribs, XYZW */ - GLuint i; - GLfloat sLeft = 0.0, sRight = 1.0; - GLfloat tTop = 1.0, tBot = 0.0; - GLfloat x0 = -1.0, x1 = 1.0; - GLfloat y0 = -1.0, y1 = 1.0; - - /* upper-left */ - verts[0][0][0] = x0; /* attr[0].x */ - verts[0][0][1] = y0; /* attr[0].y */ - verts[0][1][0] = sLeft; /* attr[1].s */ - verts[0][1][1] = tTop; /* attr[1].t */ - - /* upper-right */ - verts[1][0][0] = x1; - verts[1][0][1] = y0; - verts[1][1][0] = sRight; - verts[1][1][1] = tTop; - - /* lower-right */ - verts[2][0][0] = x1; - verts[2][0][1] = y1; - verts[2][1][0] = sRight; - verts[2][1][1] = tBot; - - /* lower-left */ - verts[3][0][0] = x0; - verts[3][0][1] = y1; - verts[3][1][0] = sLeft; - verts[3][1][1] = tBot; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - verts[i][0][2] = 0.0; /*Z*/ - verts[i][0][3] = 1.0; /*W*/ - verts[i][1][2] = 0.0; /*R*/ - verts[i][1][3] = 1.0; /*Q*/ - } - - st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_TRUE); -} - - - /** * Generate mipmap levels using hardware rendering. * \return TRUE if successful, FALSE if not possible @@ -229,12 +83,7 @@ st_render_mipmap(struct st_context *st, { struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_framebuffer_state fb; - struct pipe_sampler_state sampler; - void *sampler_cso; - const uint face = _mesa_tex_target_to_face(target), zslice = 0; - /*const uint first_level_save = pt->first_level;*/ - uint dstLevel; + const uint face = _mesa_tex_target_to_face(target); assert(target != GL_TEXTURE_3D); /* not done yet */ @@ -243,66 +92,7 @@ st_render_mipmap(struct st_context *st, return FALSE; } - /* init framebuffer state */ - memset(&fb, 0, sizeof(fb)); - fb.num_cbufs = 1; - - /* sampler state */ - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.normalized_coords = 1; - - - /* bind state */ - cso_set_blend(st->cso_context, &st->gen_mipmap.blend); - cso_set_depth_stencil_alpha(st->cso_context, &st->gen_mipmap.depthstencil); - cso_set_rasterizer(st->cso_context, &st->gen_mipmap.rasterizer); - - /* bind shaders */ - pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->driver_shader); - pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->driver_shader); - - /* - * XXX for small mipmap levels, it may be faster to use the software - * fallback path... - */ - for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { - const uint srcLevel = dstLevel - 1; - - /* - * Setup framebuffer / dest surface - */ - fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); - pipe->set_framebuffer_state(pipe, &fb); - - /* - * Setup sampler state - */ - sampler.min_lod = sampler.max_lod = srcLevel; - sampler_cso = pipe->create_sampler_state(pipe, &sampler); - pipe->bind_sampler_states(pipe, 1, &sampler_cso); - - simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); - - /* - * Setup src texture, override pt->first_level so we sample from - * the right mipmap level. - */ - /*pt->first_level = srcLevel;*/ - pipe->set_sampler_textures(pipe, 1, &pt); - - draw_quad(st->ctx); - - pipe->delete_sampler_state(pipe, sampler_cso); - } - - /* restore first_level */ - /*pt->first_level = first_level_save;*/ + util_gen_mipmap(st->gen_mipmap, pt, face, baseLevel, lastLevel); /* restore pipe state */ #if 0 diff --git a/src/mesa/state_tracker/st_gen_mipmap.h b/src/mesa/state_tracker/st_gen_mipmap.h index 7668c1e44e3..00fbae93026 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.h +++ b/src/mesa/state_tracker/st_gen_mipmap.h @@ -35,7 +35,7 @@ st_init_generate_mipmap(struct st_context *st); extern void -st_destroy_generate_mipmpap(struct st_context *st); +st_destroy_generate_mipmap(struct st_context *st); extern void diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index ad4cc62d5f7..a180441a5a6 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -348,8 +348,6 @@ static GLboolean build_vertex_emit( struct x86_program *p ) struct x86_reg vp1 = x86_make_reg(file_XMM, 2); GLubyte *fixup, *label; - x86_init_func(&p->func); - /* Push a few regs? */ x86_push(&p->func, countEBP); @@ -641,7 +639,7 @@ void _tnl_generate_sse_emit( GLcontext *ctx ) p.ctx = ctx; p.inputs_safe = 0; /* for now */ - p.outputs_safe = 1; /* for now */ + p.outputs_safe = 0; /* for now */ p.have_sse2 = cpu_has_xmm2; p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); diff --git a/src/mesa/vf/vf_sse.c b/src/mesa/vf/vf_sse.c index 3ce76e2b042..c3a2166578c 100644 --- a/src/mesa/vf/vf_sse.c +++ b/src/mesa/vf/vf_sse.c @@ -633,7 +633,7 @@ void vf_generate_sse_emit( struct vertex_fetch *vf ) p.vf = vf; p.inputs_safe = 0; /* for now */ - p.outputs_safe = 1; /* for now */ + p.outputs_safe = 0; /* for now */ p.have_sse2 = cpu_has_xmm2; p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); |