From 89498d01531cd515c769e570bf799c39fbafc8fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 7 Oct 2009 22:36:43 +0100 Subject: llvmpipe: import experimental softpipe rasterizer code, wip binning code WIP, does't build or run. Rasterizer code is based on Nick Capen's devmaster posts and the larrabee articles, but currently doesn't share either the performance or correctness of either... --- src/gallium/drivers/llvmpipe/Makefile | 2 - src/gallium/drivers/llvmpipe/SConscript | 2 - src/gallium/drivers/llvmpipe/lp_context.c | 26 +- src/gallium/drivers/llvmpipe/lp_context.h | 5 +- src/gallium/drivers/llvmpipe/lp_prim_setup.c | 190 --- src/gallium/drivers/llvmpipe/lp_prim_setup.h | 85 -- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 105 +- src/gallium/drivers/llvmpipe/lp_prim_vbuf.h | 4 +- src/gallium/drivers/llvmpipe/lp_rasterizer.c | 157 +++ src/gallium/drivers/llvmpipe/lp_rasterizer.h | 112 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 1432 +-------------------- src/gallium/drivers/llvmpipe/lp_setup.h | 17 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 140 ++ src/gallium/drivers/llvmpipe/lp_setup_rasterize.c | 7 + src/gallium/drivers/llvmpipe/lp_setup_tri.c | 755 +++++++++++ src/gallium/drivers/llvmpipe/lp_state_derived.c | 25 +- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 353 ----- src/gallium/drivers/llvmpipe/lp_tile_cache.h | 71 - 18 files changed, 1276 insertions(+), 2212 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.h create mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.c create mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.h create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_context.h create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_rasterize.c create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_tri.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_cache.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_cache.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 21aff1967a1..8f05e5a6fd1 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -32,7 +32,6 @@ C_SOURCES = \ lp_draw_arrays.c \ lp_flush.c \ lp_jit.c \ - lp_prim_setup.c \ lp_prim_vbuf.c \ lp_setup.c \ lp_query.c \ @@ -51,7 +50,6 @@ C_SOURCES = \ lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ - lp_tile_cache.c \ lp_tile_soa.c include ../../Makefile.template diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 13cd465838a..344b2463377 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -45,7 +45,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_draw_arrays.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_setup.c', 'lp_prim_vbuf.c', 'lp_setup.c', 'lp_query.c', @@ -64,7 +63,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', - 'lp_tile_cache.c', 'lp_tile_soa.c', ]) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 202cb8ef439..57e71f3e986 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,13 +31,13 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_setup.h" #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" @@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); - llvmpipe->setup = lp_draw_render_stage(llvmpipe); - if (!llvmpipe->setup) - goto fail; - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup); - } - else { - lp_init_vbuf(llvmpipe); - } + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); + if (!llvmpipe->vbuf_backend) + goto fail; + + llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); + if (!llvmpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); + draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 8d5a0d4f1fc..0b77ae58d50 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -124,9 +124,10 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct llvmpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c deleted file mode 100644 index b14f8fb99d9..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in lp_prim_vbuf.c. - * - * \author Keith Whitwell - * \author Brian Paul - */ - - -#include "lp_context.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "lp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - llvmpipe_setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - llvmpipe_setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = llvmpipe_setup_create_context(llvmpipe); - sstage->stage.draw = llvmpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -lp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -lp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h deleted file mode 100644 index da6cae63751..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_PRIM_SETUP_H -#define LP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct llvmpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -lp_draw_render_stage( struct llvmpipe_context *llvmpipe ); - -extern struct setup_context * -lp_draw_setup_context( struct draw_stage * ); - -extern void -lp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -lp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* LP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index c394dcb61d0..e244ac9087c 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -39,7 +39,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_prim_vbuf.h" -#include "lp_prim_setup.h" #include "lp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" @@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render { struct vbuf_render base; struct llvmpipe_context *llvmpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * lp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void lp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - const struct vertex_info *info = - llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -160,12 +136,8 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); + struct setup_context *setup_ctx = cvbr->setup; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup); - llvmpipe_setup_prepare( setup_ctx ); cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); @@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in lp_setup.c: - */ - lp_draw_flush( setup ); } @@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -568,40 +525,38 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - cvbr->llvmpipe->vbuf_render = NULL; + llvmpipe_setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -lp_init_vbuf(struct llvmpipe_context *lp) +struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *lp) { - assert(lp->draw); + struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render); + assert(lp->draw); - lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES; - lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info; - lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices; - lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices; - lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices; - lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive; - lp->vbuf_render->base.draw = lp_vbuf_draw; - lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays; - lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices; - lp->vbuf_render->base.destroy = lp_vbuf_destroy; + cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->llvmpipe = lp; + cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; + cvbr->base.map_vertices = lp_vbuf_map_vertices; + cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; + cvbr->base.set_primitive = lp_vbuf_set_primitive; + cvbr->base.draw = lp_vbuf_draw; + cvbr->base.draw_arrays = lp_vbuf_draw_arrays; + cvbr->base.release_vertices = lp_vbuf_release_vertices; + cvbr->base.destroy = lp_vbuf_destroy; - lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base); + cvbr->llvmpipe = lp; - draw_set_rasterize_stage(lp->draw, lp->vbuf); + cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); - draw_set_render(lp->draw, &lp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h index 6c4e6063e6d..0676e2f42ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h @@ -31,8 +31,8 @@ struct llvmpipe_context; -extern void -lp_init_vbuf(struct llvmpipe_context *llvmpipe); +extern struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); #endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_rasterizer.c new file mode 100644 index 00000000000..089ea597292 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rasterizer.c @@ -0,0 +1,157 @@ + +struct lp_rasterizer { + + /* We can choose whatever layout for the internal tile storage we + * prefer: + */ + struct { + unsigned color[TILESIZE][TILESIZE]; + unsigned depth[TILESIZE][TILESIZE]; + char stencil[TILESIZE][TILESIZE]; + } tile; + + + unsigned x; + unsigned y; + + + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + unsigned clear_color; + unsigned clear_depth; + char clear_stencil; + } state; +}; + +struct lp_rasterizer *lp_rast_create( void ) +{ + return CALLOC_STRUCT(lp_rasterizer); +} + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil) +{ + pipe_surface_reference(&rast->state.color, color); + pipe_surface_reference(&rast->state.depth, depth); + rast->state.clear_color = util_pack_8888(clear_color); + rast->state.clear_depth = clear_depth * 0xffffffff; + rast->state.clear_stencil = clear_stencil; +} + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ) +{ + rast->x = x; + rast->y = y; +} + +void lp_rast_clear_color( struct lp_rasterizer *rast ) +{ + const unsigned clear_color = rast->state.clear_color; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_color; +} + +void lp_rast_clear_depth( struct lp_rasterizer *rast ) +{ + const unsigned clear_depth = rast->state.clear_depth; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_depth; +} + +void lp_rast_clear_stencil( struct lp_rasterizer *rast ) +{ + const unsigned clear_stencil = rast->state.clear_stencil; + + memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); +} + +void lp_rast_load_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load colors from surface */ +} + +void lp_rast_load_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load depth (and stencil?) from surface */ +} + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *rast, + const struct lp_rast_state *state ) +{ + rast->shader_state = state; +} + +void lp_rast_triangle( struct lp_rasterizer *rast, + const struct lp_rast_triangle *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Scan the tile in 4x4 chunks (?) and figure out which bits to + * rasterize: + */ + +} + +void lp_rast_shade_tile( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Use the existing preference for 8x2 (four quads) shading: + */ + for (i = 0; i < TILESIZE; i += 8) { + for (j = 0; j < TILESIZE; j += 2) { + rast->shader_state.shade( inputs->jc, + rast->x + i, + rast->y + j, + rast->quads, 4 ); + } + } +} + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store colors to surface */ +} + +void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store depth/stencil to surface */ +} + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + FREE(rast); +} + diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.h b/src/gallium/drivers/llvmpipe/lp_rasterizer.h new file mode 100644 index 00000000000..b3ae06a1169 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rasterizer.h @@ -0,0 +1,112 @@ + +/* Initially create and program a single rasterizer directly. Later + * will want multiple of these, one or two per core. At that stage + * will probably pass command buffers into the rasterizers rather than + * individual function calls like this. + */ +struct lp_rasterizer; + +struct lp_rast_state { + /* State: + */ + struct lp_jit_context jc; + + /* Shader itself: + */ +}; + +/* Coefficients necessary to run the shader at a given location: + */ +struct lp_rast_shader_inputs { + + /* Current rasterizer state: + */ + const struct lp_rast_state *state; + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + + +/* Rasterization information for a triangle known to be in this bin, + * plus inputs to run the shader: + */ +struct lp_rast_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* State to run the shader: */ + struct lp_rast_shader_inputs inputs; +}; + + + +struct lp_rasterizer *lp_rast_create( void ); + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil); + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ); + +void lp_rast_clear_color( struct lp_rasterizer * ); + +void lp_rast_clear_zstencil( struct lp_rasterizer * ); + +void lp_rast_load_color( struct lp_rasterizer * ); + +void lp_rast_load_zstencil( struct lp_rasterizer * ); + + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *, + const struct lp_rast_state * ); + +void lp_rast_triangle( struct lp_rasterizer *, + const struct lp_rast_triangle * ); + +void lp_rast_shade_tile( struct lp_rasterizer *, + const struct lp_rast_shader_inputs * ); + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer * ); + +void lp_rast_store_zstencil( struct lp_rasterizer * ); + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer * ); + diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 60107214df5..8c67524506e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -26,15 +26,15 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines, triangles) + * \brief Primitive rasterization/rendering (points, lines) * * \author Keith Whitwell * \author Brian Paul */ #include "lp_context.h" -#include "lp_prim_setup.h" #include "lp_quad.h" +#include "lp_quad_pipe.h" #include "lp_setup.h" #include "lp_state.h" #include "draw/draw_context.h" @@ -44,1397 +44,49 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "lp_bld_debug.h" -#include "lp_tile_cache.h" -#include "lp_tile_soa.h" #define DEBUG_VERTS 0 -#define DEBUG_FRAGS 0 -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - - -#define MAX_QUADS 16 - - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_context { - struct llvmpipe_context *llvmpipe; - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const float (*vmax)[4]; - const float (*vmid)[4]; - const float (*vmin)[4]; - const float (*vprovoke)[4]; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - int facing; - - struct quad_header quad[MAX_QUADS]; - struct quad_header *quad_ptrs[MAX_QUADS]; - unsigned count; - - struct quad_interp_coef coef; - - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - } span; - -#if DEBUG_FRAGS - uint numFragsEmitted; /**< per primitive */ - uint numFragsWritten; /**< per primitive */ -#endif - - unsigned winding; /* which winding to cull */ -}; - - - -/** - * Execute fragment shader for the four fragments in the quad. - */ -static void -shade_quads(struct llvmpipe_context *llvmpipe, - struct quad_header *quads[], - unsigned nr) -{ - struct lp_fragment_shader *fs = llvmpipe->fs; - struct quad_header *quad = quads[0]; - const unsigned x = quad->input.x0; - const unsigned y = quad->input.y0; - uint8_t *tile; - uint8_t *color; - void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; - unsigned chan_index; - unsigned q; - - assert(fs->current); - if(!fs->current) - return; - - /* Sanity checks */ - assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); - assert(x % TILE_VECTOR_WIDTH == 0); - assert(y % TILE_VECTOR_HEIGHT == 0); - for (q = 0; q < nr; ++q) { - assert(quads[q]->input.x0 == x + q*2); - assert(quads[q]->input.y0 == y); - } - - /* mask */ - for (q = 0; q < 4; ++q) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; - - /* color buffer */ - if(llvmpipe->framebuffer.nr_cbufs >= 1 && - llvmpipe->framebuffer.cbufs[0]) { - tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); - color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); - } - else - color = NULL; - - /* depth buffer */ - if(llvmpipe->zsbuf_map) { - assert((x % 2) == 0); - assert((y % 2) == 0); - depth = llvmpipe->zsbuf_map + - y*llvmpipe->zsbuf_transfer->stride + - 2*x*llvmpipe->zsbuf_transfer->block.size; - } - else - depth = NULL; - - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(mask, 16)); - - assert(lp_check_alignment(depth, 16)); - assert(lp_check_alignment(color, 16)); - assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); - - /* run shader */ - fs->current->jit_function( &llvmpipe->jit_context, - x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, - &mask[0][0], - color, - depth); -} - - - - -/** - * Do triangle cull test using tri determinant (sign indicates orientation) - * \return true if triangle is to be culled. - */ -static INLINE boolean -cull_tri(const struct setup_context *setup, float det) -{ - if (det != 0) { - /* if (det < 0 then Z points toward camera and triangle is - * counter-clockwise winding. - */ - unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; - - if ((winding & setup->winding) == 0) - return FALSE; - } - - /* Culled: - */ - return TRUE; -} - - - -/** - * Clip setup->quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip( struct setup_context *setup, struct quad_header *quad ) -{ - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (quad->input.x0 >= maxx || - quad->input.y0 >= maxy || - quad->input.x0 + 1 < minx || - quad->input.y0 + 1 < miny) { - /* totally clipped */ - quad->inout.mask = 0x0; - return; - } - if (quad->input.x0 < minx) - quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (quad->input.y0 < miny) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (quad->input.x0 == maxx - 1) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (quad->input.y0 == maxy - 1) - quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} - - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~(2-1); -} - -static INLINE int block_x( int x ) -{ - return x & ~(TILE_VECTOR_WIDTH - 1); -} - - -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) -{ - quad_clip( setup, quad ); - - if (quad->inout.mask) { - struct llvmpipe_context *lp = setup->llvmpipe; - -#if 1 - /* XXX: The blender expects 4 quads. This is far from efficient, but - * until we codegenerate single-quad variants of the fragment pipeline - * we need this hack. */ - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - struct quad_header quads[nr_quads]; - struct quad_header *quad_ptrs[nr_quads]; - int x0 = block_x(quad->input.x0); - unsigned i; - - for(i = 0; i < nr_quads; ++i) { - int x = x0 + 2*i; - if(x == quad->input.x0) - memcpy(&quads[i], quad, sizeof quads[i]); - else { - memset(&quads[i], 0, sizeof quads[i]); - quads[i].input.x0 = x; - quads[i].input.y0 = quad->input.y0; - quads[i].coef = quad->coef; - } - quad_ptrs[i] = &quads[i]; - } - - shade_quads( lp, quad_ptrs, nr_quads ); -#else - shade_quads( lp, &quad, 1 ); -#endif - } -} - - -/** - * Render a horizontal span of quads - */ -static void flush_spans( struct setup_context *setup ) -{ - const int step = TILE_VECTOR_WIDTH; - const int xleft0 = setup->span.left[0]; - const int xleft1 = setup->span.left[1]; - const int xright0 = setup->span.right[0]; - const int xright1 = setup->span.right[1]; - - - int minleft = block_x(MIN2(xleft0, xleft1)); - int maxright = MAX2(xright0, xright1); - int x; - - for (x = minleft; x < maxright; x += step) { - unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); - unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); - unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); - unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); - unsigned lx = x; - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - unsigned q = 0; - - unsigned skipmask_left0 = (1U << skip_left0) - 1U; - unsigned skipmask_left1 = (1U << skip_left1) - 1U; - - /* These calculations fail when step == 32 and skip_right == 0. - */ - unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); - unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); - - unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; - unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; - - if (mask0 | mask1) { - for(q = 0; q < nr_quads; ++q) { - unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); - setup->quad[q].input.x0 = lx; - setup->quad[q].input.y0 = setup->span.y; - setup->quad[q].inout.mask = quadmask; - setup->quad_ptrs[q] = &setup->quad[q]; - mask0 >>= 2; - mask1 >>= 2; - lx += 2; - } - assert(!(mask0 | mask1)); - - shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads ); - } - } - - - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ -} - - -#if DEBUG_VERTS -static void print_vertex(const struct setup_context *setup, - const float (*v)[4]) -{ - int i; - debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad[0].nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, - v[i][0], v[i][1], v[i][2], v[i][3]); - if (util_is_inf_or_nan(v[i][0])) { - debug_printf(" NaN!\n"); - } - } -} -#endif - -/** - * Sort the vertices from top to bottom order, setting up the triangle - * edge fields (ebot, emaj, etop). - * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise - */ -static boolean setup_sort_vertices( struct setup_context *setup, - float det, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - setup->vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = v0[0][1]; - float y1 = v1[0][1]; - float y2 = v2[0][1]; - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; - } - } - } - - setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; - setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; - setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); - - setup->oneoverarea = 1.0f / area; - - /* - debug_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, det ); - */ - if (util_is_inf_or_nan(setup->oneoverarea)) - return FALSE; - } - - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup->facing = - ((det > 0.0) ^ - (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); - - return TRUE; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) -{ - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) -{ - setup->coef.dadx[0][i] = 0; - setup->coef.dady[0][i] = 0; - - /* need provoking vertex info! - */ - setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i]; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0; - setup->coef.dady[1 + attrib][i] = 0; - - /* need provoking vertex info! - */ - setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i]; - } -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_linear_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ - } -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* premultiply by 1/w (v[0][3] is always W): - */ - float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; - float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - float botda = mida - mina; - float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - /* - debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin[vertSlot][i], - setup->vmid[vertSlot][i], - setup->vmax[vertSlot][i] - ); - */ - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. - * Z and W are copied from posCoef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coeff(struct setup_context *setup, uint slot) -{ - /*X*/ - setup->coef.a0[1 + slot][0] = 0; - setup->coef.dadx[1 + slot][0] = 1.0; - setup->coef.dady[1 + slot][0] = 0.0; - /*Y*/ - setup->coef.a0[1 + slot][1] = 0.0; - setup->coef.dadx[1 + slot][1] = 0.0; - setup->coef.dady[1 + slot][1] = 1.0; - /*Z*/ - setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2]; - setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2]; - setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2]; - /*W*/ - setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3]; - setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3]; - setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3]; -} - - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients( struct setup_context *setup ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - - /* z and w are done by linear interpolation: - */ - tri_pos_coeff(setup, 0, 2); - tri_pos_coeff(setup, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - tri_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - tri_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } -} - - - -static void setup_tri_edges( struct setup_context *setup ) -{ - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; - - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; - - setup->emaj.sy = ceilf(vmin_y); - setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = ceilf(vmid_y); - setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = ceilf(vmin_y); - setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct setup_context *setup, - struct edge *eleft, - struct edge *eright, - unsigned lines ) -{ - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - assert((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - if (start_y < miny) - start_y = miny; - - finish_y = sy + lines; - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); - } - - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Recalculate prim's determinant. This is needed as we don't have - * get this information through the vbuf_render interface & we must - * calculate it here. - */ -static float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; -} - - -/** - * Do setup for triangle rasterization, then render the triangle. - */ -void llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - float det; - -#if DEBUG_VERTS - debug_printf("Setup triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - - if (setup->llvmpipe->no_rast) - return; - - det = calc_det(v0, v1, v2); - /* - debug_printf("%s\n", __FUNCTION__ ); - */ - -#if DEBUG_FRAGS - setup->numFragsEmitted = 0; - setup->numFragsWritten = 0; -#endif - - if (cull_tri( setup, det )) - return; - - if (!setup_sort_vertices( setup, det, v0, v1, v2 )) - return; - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); - - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES); - - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ - - /* init_constant_attribs( setup ); */ - - if (setup->oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); - } - - flush_spans( setup ); - -#if DEBUG_FRAGS - printf("Tri: %u frags emitted, %u written\n", - setup->numFragsEmitted, - setup->numFragsWritten); -#endif -} - - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -linear_pos_coeff(struct setup_context *setup, - uint vertSlot, uint i) -{ - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -line_linear_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a line. - */ -static void -line_persp_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - const float da = a1 - a0; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmax are initialized. - */ -static INLINE boolean -setup_line_coefficients(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - float area; - - /* use setup->vmin, vmax to point to vertices */ - if (llvmpipe->rasterizer->flatshade_first) - setup->vprovoke = v0; - else - setup->vprovoke = v1; - setup->vmin = v0; - setup->vmax = v1; - - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - - /* NOTE: this is not really area but something proportional to it */ - area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; - if (area == 0.0f || util_is_inf_or_nan(area)) - return FALSE; - setup->oneoverarea = 1.0f / area; - - /* z and w are done by linear interpolation: - */ - linear_pos_coeff(setup, 0, 2); - linear_pos_coeff(setup, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - line_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - line_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } - return TRUE; -} - - -/** - * Plot a pixel in a line segment. +/* Stubs for lines & points for now: */ -static INLINE void -plot(struct setup_context *setup, int x, int y) +void +llvmpipe_setup_point(struct setup_context *setup, + const float (*v0)[4]) { - const int iy = y & 1; - const int ix = x & 1; - const int quadX = x - ix; - const int quadY = y - iy; - const int mask = (1 << ix) << (2 * iy); - - if (quadX != setup->quad[0].input.x0 || - quadY != setup->quad[0].input.y0) - { - /* flush prev quad, start new quad */ - - if (setup->quad[0].input.x0 != -1) - clip_emit_quad( setup, &setup->quad[0] ); - - setup->quad[0].input.x0 = quadX; - setup->quad[0].input.y0 = quadY; - setup->quad[0].inout.mask = 0x0; - } - - setup->quad[0].inout.mask |= mask; } - -/** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. - */ void llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - int x0 = (int) v0[0][0]; - int x1 = (int) v1[0][0]; - int y0 = (int) v0[0][1]; - int y1 = (int) v1[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - -#if DEBUG_VERTS - debug_printf("Setup line:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); -#endif - - if (setup->llvmpipe->no_rast) - return; - - if (dx == 0 && dy == 0) - return; - - if (!setup_line_coefficients(setup, v0, v1)) - return; - - assert(v0[0][0] < 1.0e9); - assert(v0[0][1] < 1.0e9); - assert(v1[0][0] < 1.0e9); - assert(v1[0][1] < 1.0e9); - - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } - - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } - - assert(dx >= 0); - assert(dy >= 0); - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); - - setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; - setup->quad[0].inout.mask = 0x0; - - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad[0].input.coverage[0] = - setup->quad[0].input.coverage[1] = - setup->quad[0].input.coverage[2] = - setup->quad[0].input.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } - } - } - - /* draw final quad */ - if (setup->quad[0].inout.mask) { - clip_emit_quad( setup, &setup->quad[0] ); - } -} - - -static void -point_persp_coeff(struct setup_context *setup, - const float (*vert)[4], - unsigned attrib, - uint vertSlot) + const float (*v0)[4], + const float (*v1)[4]) { - unsigned i; - for(i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0.0F; - setup->coef.dady[1 + attrib][i] = 0.0F; - setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3]; - } } -/** - * Do setup for point rasterization, then render the point. - * Round or square points... - * XXX could optimize a lot for 1-pixel points. +/* Called after statechange, before emitting primitives. If binning + * is active, this function should store relevant state in the binning + * context. + * + * That includes: + * - current fragment shader function + * - bound constant buffer contents + * - bound textures + * - blend color + * - etc. + * + * Basically everything needed at some point in the future to + * rasterize triangles for the current state. + * + * Additionally this will set up the state needed for the rasterizer + * to process and bin incoming triangles. That would include such + * things as: + * - cull mode + * - ??? + * - etc. + * */ -void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const int sizeAttr = setup->llvmpipe->psize_slot; - const float size - = sizeAttr > 0 ? v0[sizeAttr][0] - : setup->llvmpipe->rasterizer->point_size; - const float halfSize = 0.5F * size; - const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth; - const float x = v0[0][0]; /* Note: data[0] is always position */ - const float y = v0[0][1]; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - -#if DEBUG_VERTS - debug_printf("Setup point:\n"); - print_vertex(setup, v0); -#endif - - if (llvmpipe->no_rast) - return; - - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS); - - /* For points, all interpolants are constant-valued. - * However, for point sprites, we'll need to setup texcoords appropriately. - * XXX: which coefficients are the texcoords??? - * We may do point sprites as textured quads... - * - * KW: We don't know which coefficients are texcoords - ultimately - * the choice of what interpolation mode to use for each attribute - * should be determined by the fragment program, using - * per-attribute declaration statements that include interpolation - * mode as a parameter. So either the fragment program will have - * to be adjusted for pointsprite vs normal point behaviour, or - * otherwise a special interpolation mode will have to be defined - * which matches the required behaviour for point sprites. But - - * the latter is not a feature of normal hardware, and as such - * probably should be ruled out on that basis. - */ - setup->vprovoke = v0; - - /* setup Z, W */ - const_pos_coeff(setup, 0, 2); - const_pos_coeff(setup, 0, 3); - - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } - - - if (halfSize <= 0.5 && !round) { - /* special case for 1-pixel points */ - const int ix = ((int) x) & 1; - const int iy = ((int) y) & 1; - setup->quad[0].input.x0 = (int) x - ix; - setup->quad[0].input.y0 = (int) y - iy; - setup->quad[0].inout.mask = (1 << ix) << (2 * iy); - clip_emit_quad( setup, &setup->quad[0] ); - } - else { - if (round) { - /* rounded points */ - const int ixmin = block((int) (x - halfSize)); - const int ixmax = block((int) (x + halfSize)); - const int iymin = block((int) (y - halfSize)); - const int iymax = block((int) (y + halfSize)); - const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ - const float rmax = halfSize + 0.7071F; - const float rmin2 = MAX2(0.0F, rmin * rmin); - const float rmax2 = rmax * rmax; - const float cscale = 1.0F / (rmax2 - rmin2); - int ix, iy; - - for (iy = iymin; iy <= iymax; iy += 2) { - for (ix = ixmin; ix <= ixmax; ix += 2) { - float dx, dy, dist2, cover; - - setup->quad[0].inout.mask = 0x0; - - dx = (ix + 0.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_RIGHT; - } - - dx = (ix + 0.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; - } - - if (setup->quad[0].inout.mask) { - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } - } - } - } - else { - /* square points */ - const int xmin = (int) (x + 0.75 - halfSize); - const int ymin = (int) (y + 0.25 - halfSize); - const int xmax = xmin + (int) size; - const int ymax = ymin + (int) size; - /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ - const int ixmin = block(xmin); - const int ixmax = block(xmax - 1); - const int iymin = block(ymin); - const int iymax = block(ymax - 1); - int ix, iy; - - /* - debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); - */ - for (iy = iymin; iy <= iymax; iy += 2) { - uint rowMask = 0xf; - if (iy < ymin) { - /* above the top edge */ - rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - } - if (iy + 1 >= ymax) { - /* below the bottom edge */ - rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); - } - - for (ix = ixmin; ix <= ixmax; ix += 2) { - uint mask = rowMask; - - if (ix < xmin) { - /* fragment is past left edge of point, turn off left bits */ - mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - } - if (ix + 1 >= xmax) { - /* past the right edge */ - mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - } - - setup->quad[0].inout.mask = mask; - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } - } - } - } -} - -void llvmpipe_setup_prepare( struct setup_context *setup ) +void setup_prepare( struct setup_context *setup ) { struct llvmpipe_context *lp = setup->llvmpipe; @@ -1442,6 +94,8 @@ void llvmpipe_setup_prepare( struct setup_context *setup ) llvmpipe_update_derived(lp); } + lp->quad.first->begin( lp->quad.first ); + if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { @@ -1452,38 +106,28 @@ void llvmpipe_setup_prepare( struct setup_context *setup ) /* 'draw' will do culling */ setup->winding = PIPE_WINDING_NONE; } + + setup_prepare_tri( setup->llvmpipe ); } -void llvmpipe_setup_destroy_context( struct setup_context *setup ) +void setup_destroy_context( struct setup_context *setup ) { - align_free( setup ); + FREE( setup ); } /** * Create a new primitive setup/render stage. */ -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ) +struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ) { - struct setup_context *setup; + struct setup_context *setup = CALLOC_STRUCT(setup_context); unsigned i; - setup = align_malloc(sizeof(struct setup_context), 16); - if (!setup) - return NULL; - - memset(setup, 0, sizeof *setup); setup->llvmpipe = llvmpipe; - for (i = 0; i < MAX_QUADS; i++) { - setup->quad[i].coef = &setup->coef; - } - - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ - return setup; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 89c43da0460..05aaaf83b8e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -30,11 +30,8 @@ struct setup_context; struct llvmpipe_context; -void -llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ); +/* Note, not using setup_context currently + */ void llvmpipe_setup_line(struct setup_context *setup, @@ -46,8 +43,12 @@ llvmpipe_setup_point( struct setup_context *setup, const float (*v0)[4] ); -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ); -void llvmpipe_setup_prepare( struct setup_context *setup ); -void llvmpipe_setup_destroy_context( struct setup_context *setup ); +struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ); + +void setup_prepare( struct setup_context *setup ); + +void setup_destroy_context( struct setup_context *setup ); + +void setup_prepare_tri( struct llvmpipe_context *llvmpipe ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h new file mode 100644 index 00000000000..848705e0991 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef LP_SETUP_CONTEXT_H +#define LP_SETUP_CONTEXT_H + +struct clear_tile { + boolean do_color; + boolean do_depth_stencil; + unsigned rgba; + unsigned depth_stencil; +}; + +struct load_tile { + boolean do_color; + boolean do_depth_stencil; +}; + +/* Shade tile points directly at this: + */ +struct shader_inputs { + /* Some way of updating rasterizer state: + */ + /* ??? */ + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + +/* Shade triangle points at this: + */ +struct shade_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + struct shader_inputs inputs; +}; + +struct bin_cmd { + enum { + CMD_END = 0, + CMD_CLEAR, + CMD_LOAD_TILE, + CMD_SHADE_TILE, + CMD_SHADE_TRIANGLE, + } cmd; + + union { + struct triangle *tri; + struct clear *clear; + } ptr; +}; + +struct cmd_block { + struct bin_cmd cmds[128]; + unsigned count; + struct cmd_block *next; +}; + +/* Triangles + */ +struct data_block { + ubyte data[4096 - sizeof(unsigned) - sizeof(struct cmd_block *)]; + unsigned count; + struct data_block *next; +}; + +/* Need to store the state at the time the triangle was drawn, at + * least as it is needed during rasterization. That would include at + * minimum the constant values referred to by the fragment shader, + * blend state, etc. Much of this is code-generated into the shader + * in llvmpipe -- may be easier to do this work there. + */ +struct state_block { +}; + + +/** + * Basically all the data from a binner scene: + */ +struct binned_scene { + struct llvmpipe_context *llvmpipe; + + struct cmd_block *bin[MAX_HEIGHT / BIN_SIZE][MAX_WIDTH / BIN_SIZE]; + struct data_block *data; +}; + +static INLINE struct triangle *get_triangle( struct setup_context *setup ) +{ + if (setup->triangles->count == TRIANGLE_BLOCK_COUNT) + return setup_triangle_from_new_block( setup ); + + return &setup->triangles[setup->triangles->count++]; +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c new file mode 100644 index 00000000000..5b4faf489b8 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c @@ -0,0 +1,7 @@ + +void +rasterize( struct llvmpipe_context *llvmpipe, + struct binned_scene *scene ) +{ + +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c new file mode 100644 index 00000000000..a09e0fa643e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -0,0 +1,755 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Recursive rasterization for triangles + */ + +#include "lp_context.h" +#include "lp_quad.h" +#include "lp_quad_pipe.h" +#include "lp_setup.h" +#include "lp_state.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define BLOCKSIZE 4 + +struct triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef position_coef; + + /* A run of pre-initialized quads: + */ + struct llvmpipe_context *llvmpipe; + struct quad_header quad[4]; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct tgsi_interp_coef *coef, + const float (*v3)[4], + unsigned vert_attr, + unsigned i ) +{ + coef->a0[i] = v3[vert_attr][i]; + coef->dadx[i] = 0; + coef->dady[i] = 0; +} + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct triangle *tri, + struct tgsi_interp_coef *coef, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct triangle *tri, + struct tgsi_interp_coef *coef, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct triangle *tri, unsigned slot) +{ + /*X*/ + tri->coef[slot].a0[0] = 0.0; + tri->coef[slot].dadx[0] = 1.0; + tri->coef[slot].dady[0] = 0.0; + /*Y*/ + tri->coef[slot].a0[1] = 0.0; + tri->coef[slot].dadx[1] = 0.0; + tri->coef[slot].dady[1] = 1.0; + /*Z*/ + tri->coef[slot].a0[2] = tri->position_coef.a0[2]; + tri->coef[slot].dadx[2] = tri->position_coef.dadx[2]; + tri->coef[slot].dady[2] = tri->position_coef.dady[2]; + /*W*/ + tri->coef[slot].a0[3] = tri->position_coef.a0[3]; + tri->coef[slot].dadx[3] = tri->position_coef.dadx[3]; + tri->coef[slot].dady[3] = tri->position_coef.dady[3]; +} + + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_tri_coefficients( struct llvmpipe_context *llvmpipe, + struct triangle *tri, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontface ) +{ + const struct lp_fragment_shader *fs = llvmpipe->fs; + const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); + unsigned input; + + /* z and w are done by linear interpolation: + */ + linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 2); + linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (input = 0; input < fs->info.num_inputs; input++) { + unsigned vert_attr = vinfo->attrib[input].src_index; + unsigned i; + + switch (vinfo->attrib[input].interp_mode) { + case INTERP_CONSTANT: + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(&tri->coef[input], v3, vert_attr, i); + break; + + case INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + break; + + case INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + break; + + case INTERP_POS: + setup_fragcoord_coef(tri, input); + break; + + default: + assert(0); + } + + if (fs->info.input_semantic_name[input] == TGSI_SEMANTIC_FACE) { + tri->coef[input].a0[0] = 1.0f - frontface; + tri->coef[input].dadx[0] = 0.0; + tri->coef[input].dady[0] = 0.0; + } + } +} + + + +/* XXX: do this by add/subtracting a large floating point number: + */ +static inline float subpixel_snap( float a ) +{ + int i = a * 16; + return (float)i * (1.0/16); +} + + +/* Convert 8x8 block into four runs of quads and render each in turn. + */ +#if (BLOCKSIZE == 8) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int i; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + tri->quad[2].input.x0 = x + 4; + tri->quad[3].input.x0 = x + 6; + + for (i = 0; i < 4; i++, y += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + tri->quad[2].inout.mask = 0xf; + tri->quad[3].inout.mask = 0xf; + + tri->quad[0].input.y0 = y; + tri->quad[1].input.y0 = y; + tri->quad[2].input.y0 = y; + tri->quad[3].input.y0 = y; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + ptrs[2] = &tri->quad[2]; + ptrs[3] = &tri->quad[3]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); + } +} +#elif (BLOCKSIZE == 4) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + + for (iy = 0; iy < 4; iy += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + + tri->quad[0].input.y0 = y + iy; + tri->quad[1].input.y0 = y + iy; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); + } +} +#else +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x; + tri->quad[0].input.y0 = y; + tri->quad[0].inout.mask = 0xf; + + ptrs[0] = &tri->quad[0]; + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 1 ); +} +#endif + + +static void +do_quad( struct triangle *tri, + int x, int y, + float c1, float c2, float c3 ) +{ + struct quad_header *quad = &tri->quad[0]; + + float xstep1 = -tri->dy12; + float xstep2 = -tri->dy23; + float xstep3 = -tri->dy31; + + float ystep1 = tri->dx12; + float ystep2 = tri->dx23; + float ystep3 = tri->dx31; + + quad->input.x0 = x; + quad->input.y0 = y; + quad->inout.mask = 0; + + if (c1 > 0 && + c2 > 0 && + c3 > 0) + quad->inout.mask |= 1; + + if (c1 + xstep1 > 0 && + c2 + xstep2 > 0 && + c3 + xstep3 > 0) + quad->inout.mask |= 2; + + if (c1 + ystep1 > 0 && + c2 + ystep2 > 0 && + c3 + ystep3 > 0) + quad->inout.mask |= 4; + + if (c1 + ystep1 + xstep1 > 0 && + c2 + ystep2 + xstep2 > 0 && + c3 + ystep3 + xstep3 > 0) + quad->inout.mask |= 8; + + if (quad->inout.mask) + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, &quad, 1 ); +} + +/* Evaluate each pixel in a block, generate a mask and possibly render + * the quad: + */ +static void +do_block( struct triangle *tri, + int x, int y, + float c1, + float c2, + float c3 ) +{ + const int step = 2; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + + int ix, iy; + + for (iy = 0; iy < BLOCKSIZE; iy += 2) { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (ix = 0; ix < BLOCKSIZE; ix += 2) { + + do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); + + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } +} + + + + +/* to avoid having to allocate power-of-four, square render targets, + * end up having a specialized version of the above that runs only at + * the topmost level. + * + * at the topmost level there may be an arbitary number of steps on + * either dimension, so this loop needs to be either separately + * code-generated and unrolled for each render target size, or kept as + * generic looping code: + */ + +#define MIN3(a,b,c) MIN2(MIN2(a,b),c) +#define MAX3(a,b,c) MAX2(MAX2(a,b),c) + +static void +do_triangle_ccw(struct llvmpipe_context *llvmpipe, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontfacing ) +{ + const int rt_width = llvmpipe->framebuffer.cbufs[0]->width; + const int rt_height = llvmpipe->framebuffer.cbufs[0]->height; + + const float y1 = subpixel_snap(v1[0][1]); + const float y2 = subpixel_snap(v2[0][1]); + const float y3 = subpixel_snap(v3[0][1]); + + const float x1 = subpixel_snap(v1[0][0]); + const float x2 = subpixel_snap(v2[0][0]); + const float x3 = subpixel_snap(v3[0][0]); + + struct triangle tri; + float area; + float c1, c2, c3; + int i; + int minx, maxx, miny, maxy; + + tri.llvmpipe = llvmpipe; + + + tri.dx12 = x1 - x2; + tri.dx23 = x2 - x3; + tri.dx31 = x3 - x1; + + tri.dy12 = y1 - y2; + tri.dy23 = y2 - y3; + tri.dy31 = y3 - y1; + + area = (tri.dx12 * tri.dy31 - + tri.dx31 * tri.dy12); + + /* Cull non-ccw and zero-sized triangles. + */ + if (area <= 0 || util_is_inf_or_nan(area)) + return; + + // Bounding rectangle + minx = util_iround(MIN3(x1, x2, x3) - .5); + maxx = util_iround(MAX3(x1, x2, x3) + .5); + miny = util_iround(MIN3(y1, y2, y3) - .5); + maxy = util_iround(MAX3(y1, y2, y3) + .5); + + /* Clamp to framebuffer (or tile) dimensions: + */ + miny = MAX2(0, miny); + minx = MAX2(0, minx); + maxy = MIN2(rt_height, maxy); + maxx = MIN2(rt_width, maxx); + + if (miny == maxy || minx == maxx) + return; + + /* The only divide in this code. Is it really needed? + */ + tri.oneoverarea = 1.0f / area; + + /* Setup parameter interpolants: + */ + setup_tri_coefficients( llvmpipe, &tri, v1, v2, v3, frontfacing ); + + for (i = 0; i < Elements(tri.quad); i++) { + tri.quad[i].coef = tri.coef; + tri.quad[i].posCoef = &tri.position_coef; + } + + /* half-edge constants, will be interated over the whole + * rendertarget. + */ + c1 = tri.dy12 * x1 - tri.dx12 * y1; + c2 = tri.dy23 * x2 - tri.dx23 * y2; + c3 = tri.dy31 * x3 - tri.dx31 * y3; + + /* correct for top-left fill convention: + */ + if (tri.dy12 < 0 || (tri.dy12 == 0 && tri.dx12 > 0)) c1++; + if (tri.dy23 < 0 || (tri.dy23 == 0 && tri.dx23 > 0)) c2++; + if (tri.dy31 < 0 || (tri.dy31 == 0 && tri.dx31 > 0)) c3++; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + tri.eo1 = 0; + if (tri.dy12 < 0) tri.eo1 -= tri.dy12; + if (tri.dx12 > 0) tri.eo1 += tri.dx12; + + tri.eo2 = 0; + if (tri.dy23 < 0) tri.eo2 -= tri.dy23; + if (tri.dx23 > 0) tri.eo2 += tri.dx23; + + tri.eo3 = 0; + if (tri.dy31 < 0) tri.eo3 -= tri.dy31; + if (tri.dx31 > 0) tri.eo3 += tri.dx31; + + /* Calculate trivial accept offsets from the above. + */ + tri.ei1 = tri.dx12 - tri.dy12 - tri.eo1; + tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; + tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; + + minx &= ~(BLOCKSIZE-1); /* aligned blocks */ + miny &= ~(BLOCKSIZE-1); /* aligned blocks */ + + c1 += tri.dx12 * miny - tri.dy12 * minx; + c2 += tri.dx23 * miny - tri.dy23 * minx; + c3 += tri.dx31 * miny - tri.dy31 * minx; + + if ((miny & ~15) == (maxy & ~15) && + (minx & ~15) == (maxx & ~15)) + { + const int step = 2; + + float xstep1 = -step * tri.dy12; + float xstep2 = -step * tri.dy23; + float xstep3 = -step * tri.dy31; + + float ystep1 = step * tri.dx12; + float ystep2 = step * tri.dx23; + float ystep3 = step * tri.dx31; + + float eo1 = tri.eo1 * step; + float eo2 = tri.eo2 * step; + float eo3 = tri.eo3 * step; + + int x, y; + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else + { + do_quad(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } + else + { + const int step = BLOCKSIZE; + + float ei1 = tri.ei1 * step; + float ei2 = tri.ei2 * step; + float ei3 = tri.ei3 * step; + + float eo1 = tri.eo1 * step; + float eo2 = tri.eo2 * step; + float eo3 = tri.eo3 * step; + + float xstep1 = -step * tri.dy12; + float xstep2 = -step * tri.dy23; + float xstep3 = -step * tri.dy31; + + float ystep1 = step * tri.dx12; + float ystep2 = step * tri.dx23; + float ystep3 = step * tri.dx31; + int x, y; + + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + boolean in = false; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + /* do nothing */ + if (in) + break; + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + in = TRUE; + block_full(&tri, x, y); /* trivial accept */ + } + else + { + in = TRUE; + // block_full(&tri, x, y); /* trivial accept */ + do_block(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } +} + +static void triangle_cw( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( llvmpipe, v1, v0, v2, !llvmpipe->ccw_is_frontface ); +} + +static void triangle_ccw( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( llvmpipe, v0, v1, v2, llvmpipe->ccw_is_frontface ); +} + +static void triangle_both( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + if (ex * fy - ey * fx < 0) + triangle_ccw( llvmpipe, v0, v1, v2 ); + else + triangle_cw( llvmpipe, v0, v1, v2 ); +} + +static void triangle_nop( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +} + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_prepare_tri( struct llvmpipe_context *llvmpipe ) +{ + llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == + PIPE_WINDING_CW); + + switch (llvmpipe->rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + llvmpipe->triangle = triangle_both; + break; + case PIPE_WINDING_CCW: + llvmpipe->triangle = triangle_cw; + break; + case PIPE_WINDING_CW: + llvmpipe->triangle = triangle_ccw; + break; + default: + llvmpipe->triangle = triangle_nop; + break; + } +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 30fb41ea65d..31eaadda216 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -67,24 +67,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) const struct lp_fragment_shader *lpfs = llvmpipe->fs; const enum interp_mode colorInterp = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - if (llvmpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c deleted file mode 100644 index ec3e002d628..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ /dev/null @@ -1,353 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture tile caching. - * - * Author: - * Brian Paul - */ - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_tile.h" -#include "util/u_rect.h" -#include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tile_soa.h" -#include "lp_tile_cache.h" - - -#define MAX_WIDTH 4096 -#define MAX_HEIGHT 4096 - - -enum llvmpipe_tile_status -{ - LP_TILE_STATUS_UNDEFINED = 0, - LP_TILE_STATUS_CLEAR = 1, - LP_TILE_STATUS_DEFINED = 2 -}; - - -struct llvmpipe_cached_tile -{ - enum llvmpipe_tile_status status; - - /** color in SOA format */ - uint8_t *color; -}; - - -struct llvmpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; - - uint8_t clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - - struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ -}; - - -struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ) -{ - struct llvmpipe_tile_cache *tc; - int maxLevels, maxTexSize; - - /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ - maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); - maxTexSize = 1 << (maxLevels - 1); - assert(MAX_WIDTH >= maxTexSize); - - tc = CALLOC_STRUCT( llvmpipe_tile_cache ); - if(!tc) - return NULL; - - tc->screen = screen; - - return tc; -} - - -void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_screen *screen; - unsigned x, y; - - for (y = 0; y < MAX_HEIGHT; y += TILE_SIZE) { - for (x = 0; x < MAX_WIDTH; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->color) - align_free(tile->color); - } - } - - if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); - } - - FREE( tc ); -} - - -/** - * Specify the surface to cache. - */ -void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *ps) -{ - if (tc->transfer) { - struct pipe_screen *screen = tc->transfer->texture->screen; - - if (ps == tc->surface) - return; - - if (tc->transfer_map) { - screen->transfer_unmap(screen, tc->transfer); - tc->transfer_map = NULL; - } - - screen->tex_transfer_destroy(tc->transfer); - tc->transfer = NULL; - } - - tc->surface = ps; - - if (ps) { - struct pipe_screen *screen = ps->texture->screen; - unsigned x, y; - - tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face, - ps->level, ps->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, ps->width, ps->height); - - for (y = 0; y < ps->height; y += TILE_SIZE) { - for (x = 0; x < ps->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - tile->status = LP_TILE_STATUS_UNDEFINED; - - if(!tile->color) - tile->color = align_malloc( TILE_SIZE*TILE_SIZE*NUM_CHANNELS, 16 ); - } - } - } -} - - -/** - * Return the transfer being cached. - */ -struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc) -{ - return tc->surface; -} - - -void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); -} - - -void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); - tc->transfer_map = NULL; - } -} - - -/** - * Set a tile to a solid color. - */ -static void -clear_tile(struct llvmpipe_cached_tile *tile, - uint8_t clear_color[4]) -{ - if (clear_color[0] == clear_color[1] && - clear_color[1] == clear_color[2] && - clear_color[2] == clear_color[3]) { - memset(tile->color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); - } - else { - uint x, y, chan; - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(tile->color, x, y, chan) = clear_color[chan]; - } -} - - -/** - * Flush the tile cache: write all dirty tiles back to the transfer. - * any tiles "flagged" as cleared will be "really" cleared. - */ -void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_transfer *pt = tc->transfer; - unsigned x, y; - - if(!pt) - return; - - assert(tc->transfer_map); - - /* push the tile to all positions marked as clear */ - for (y = 0; y < pt->height; y += TILE_SIZE) { - for (x = 0; x < pt->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->status != LP_TILE_STATUS_UNDEFINED) { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - if (!pipe_clip_tile(x, y, &w, &h, pt)) { - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* Actually clear the tiles which were flagged as being in a - * clear state. */ - util_fill_rect(tc->transfer_map, &pt->block, pt->stride, - x, y, w, h, - tc->clear_val); - break; - - case LP_TILE_STATUS_DEFINED: - lp_tile_write_4ub(pt->format, - tile->color, - tc->transfer_map, pt->stride, - x, y, w, h); - break; - - default: - assert(0); - break; - } - } - - tile->status = LP_TILE_STATUS_UNDEFINED; - } - } - } -} - - -/** - * Get a tile from the cache. - * \param x, y position of tile, in pixels - */ -void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ) -{ - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - struct pipe_transfer *pt = tc->transfer; - - assert(tc->surface); - assert(tc->transfer); - - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* don't get tile from framebuffer, just clear it */ - clear_tile(tile, tc->clear_color); - tile->status = LP_TILE_STATUS_DEFINED; - break; - - case LP_TILE_STATUS_UNDEFINED: { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - x &= ~(TILE_SIZE - 1); - y &= ~(TILE_SIZE - 1); - - if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) - lp_tile_read_4ub(pt->format, - tile->color, - tc->transfer_map, tc->transfer->stride, - x, y, w, h); - - tile->status = LP_TILE_STATUS_DEFINED; - break; - } - - case LP_TILE_STATUS_DEFINED: - /* nothing to do */ - break; - } - - return tile->color; -} - - -/** - * When a whole surface is being cleared to a value we can avoid - * fetching tiles above. - * Save the color and set a 'clearflag' for each tile of the screen. - */ -void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue) -{ - struct pipe_transfer *pt = tc->transfer; - const unsigned w = pt->width; - const unsigned h = pt->height; - unsigned x, y, chan; - - for(chan = 0; chan < 4; ++chan) - tc->clear_color[chan] = float_to_ubyte(rgba[chan]); - - tc->clear_val = clearValue; - - /* push the tile to all positions marked as clear */ - for (y = 0; y < h; y += TILE_SIZE) { - for (x = 0; x < w; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - tile->status = LP_TILE_STATUS_CLEAR; - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h deleted file mode 100644 index 161bab37991..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ /dev/null @@ -1,71 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_TILE_CACHE_H -#define LP_TILE_CACHE_H - - -#include "pipe/p_compiler.h" -#include "lp_tile_soa.h" - - -struct llvmpipe_tile_cache; /* opaque */ - - -extern struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ); - -extern void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *lps); - -extern struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc); - -extern void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue); - -extern void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ); - - -#endif /* LP_TILE_CACHE_H */ - -- cgit v1.2.3 From e529170c11d3cb5812aabeff0a6ee2d7a2ea66f2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 11:47:33 +0100 Subject: llvmpipe: more wipping --- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 119 ++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 129 ++++++++ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 31 ++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 348 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rasterizer.c | 157 ---------- src/gallium/drivers/llvmpipe/lp_rasterizer.h | 112 ------- src/gallium/drivers/llvmpipe/lp_setup.c | 17 ++ src/gallium/drivers/llvmpipe/lp_setup_rasterize.c | 19 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 +- src/gallium/drivers/llvmpipe/lp_state_derived.c | 27 ++ 11 files changed, 691 insertions(+), 276 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_rast.c create mode 100644 src/gallium/drivers/llvmpipe/lp_rast.h create mode 100644 src/gallium/drivers/llvmpipe/lp_rast_priv.h create mode 100644 src/gallium/drivers/llvmpipe/lp_rast_tri.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_rasterizer.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index e244ac9087c..8cccb2905b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -138,7 +138,7 @@ lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct setup_context *setup_ctx = cvbr->setup; - llvmpipe_setup_prepare( setup_ctx ); + llvmpipe_update_state( setup_ctx->llvmpipe ); cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c new file mode 100644 index 00000000000..4771f821b3c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -0,0 +1,119 @@ + +struct lp_rasterizer *lp_rast_create( void ) +{ + return CALLOC_STRUCT(lp_rasterizer); +} + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil) +{ + pipe_surface_reference(&rast->state.color, color); + pipe_surface_reference(&rast->state.depth, depth); + rast->state.clear_color = util_pack_8888(clear_color); + rast->state.clear_depth = clear_depth * 0xffffffff; + rast->state.clear_stencil = clear_stencil; +} + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ) +{ + rast->x = x; + rast->y = y; +} + +void lp_rast_clear_color( struct lp_rasterizer *rast ) +{ + const unsigned clear_color = rast->state.clear_color; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_color; +} + +void lp_rast_clear_depth( struct lp_rasterizer *rast ) +{ + const unsigned clear_depth = rast->state.clear_depth; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_depth; +} + +void lp_rast_clear_stencil( struct lp_rasterizer *rast ) +{ + const unsigned clear_stencil = rast->state.clear_stencil; + + memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); +} + +void lp_rast_load_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load colors from surface */ +} + +void lp_rast_load_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load depth (and stencil?) from surface */ +} + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *rast, + const struct lp_rast_state *state ) +{ + rast->shader_state = state; + lp->quad.first->begin( lp->quad.first ); + +} + + +void lp_rast_shade_tile( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = &inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Use the existing preference for 8x2 (four quads) shading: + */ + for (i = 0; i < TILESIZE; i += 8) { + for (j = 0; j < TILESIZE; j += 2) { + rast->shader_state.shade( inputs->jc, + rast->x + i, + rast->y + j, + rast->quads, 4 ); + } + } +} + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store colors to surface */ +} + +void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store depth/stencil to surface */ +} + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + FREE(rast); +} + diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h new file mode 100644 index 00000000000..8f4bd52c9e5 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -0,0 +1,129 @@ + +#ifndef LP_RAST_H +#define LP_RAST_H + +/* Initially create and program a single rasterizer directly. Later + * will want multiple of these, one or two per core. At that stage + * will probably pass command buffers into the rasterizers rather than + * individual function calls like this. + */ +struct lp_rasterizer; + +struct lp_rast_state { + /* State for the shader: + */ + struct lp_jit_context jc; + + /* The shader itself. Probably we also need to pass a pointer to + * the tile color/z/stencil data somehow: + */ + void (*run)( struct lp_jit_context *jc, + struct quad_header **quads, + unsigned nr ); +}; + +/* Coefficients necessary to run the shader at a given location: + */ +struct lp_rast_shader_inputs { + + /* Current rasterizer state: + */ + const struct lp_rast_state *state; + + /* Attribute interpolation: + */ + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + + +/* Rasterization information for a triangle known to be in this bin, + * plus inputs to run the shader: + */ +struct lp_rast_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* State to run the shader: */ + struct lp_rast_shader_inputs inputs; +}; + + + +struct lp_rasterizer *lp_rast_create( void ); + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil); + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ); + + + +union lp_rast_cmd_arg { + const struct lp_rast_shader_inputs *shade_tile; + const struct lp_rast_triangle *triangle; + const struct lp_rast_state *set_state; +}; + + +/* Binnable Commands: + */ +void lp_rast_clear_color( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_clear_zstencil( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_load_color( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_load_zstencil( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_set_state( struct lp_rasterizer *, + const union lp_rast_cmd_arg * ); + +void lp_rast_triangle( struct lp_rasterizer *, + const union lp_rast_cmd_arg * ); + +void lp_rast_shade_tile( struct lp_rasterizer *, + const union lp_rast_cmd_arg * ); + +void lp_rast_store_color( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + +void lp_rast_store_zstencil( struct lp_rasterizer *, + const union lp_rast_cmd_arg *); + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer * ); + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h new file mode 100644 index 00000000000..538ec225511 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -0,0 +1,31 @@ +#ifndef LP_RAST_PRIV_H +#define LP_RAST_PRIV_H + +#include "lp_rast.h" + +struct lp_rasterizer { + + /* We can choose whatever layout for the internal tile storage we + * prefer: + */ + struct { + unsigned color[TILESIZE][TILESIZE]; + unsigned depth[TILESIZE][TILESIZE]; + char stencil[TILESIZE][TILESIZE]; + } tile; + + + unsigned x; + unsigned y; + + + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + unsigned clear_color; + unsigned clear_depth; + char clear_stencil; + } state; +}; + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c new file mode 100644 index 00000000000..4b7b3719de2 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -0,0 +1,348 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Rasterization for binned triangles within a tile + */ + +#include "lp_context.h" +#include "lp_quad.h" +#include "lp_quad_pipe.h" +#include "lp_setup.h" +#include "lp_state.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define BLOCKSIZE 4 + + +/* Convert 8x8 block into four runs of quads and render each in turn. + */ +#if (BLOCKSIZE == 8) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int i; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + tri->quad[2].input.x0 = x + 4; + tri->quad[3].input.x0 = x + 6; + + for (i = 0; i < 4; i++, y += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + tri->quad[2].inout.mask = 0xf; + tri->quad[3].inout.mask = 0xf; + + tri->quad[0].input.y0 = y; + tri->quad[1].input.y0 = y; + tri->quad[2].input.y0 = y; + tri->quad[3].input.y0 = y; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + ptrs[2] = &tri->quad[2]; + ptrs[3] = &tri->quad[3]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); + } +} +#else +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + + for (iy = 0; iy < 4; iy += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + + tri->quad[0].input.y0 = y + iy; + tri->quad[1].input.y0 = y + iy; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); + } +} +#endif + +static void +do_quad( struct lp_rasterizer *rast, + int x, int y, + float c1, float c2, float c3 ) +{ + struct triangle *tri = rast->tri; + struct quad_header *quad = &rast->quad[0]; + + float xstep1 = -tri->dy12; + float xstep2 = -tri->dy23; + float xstep3 = -tri->dy31; + + float ystep1 = tri->dx12; + float ystep2 = tri->dx23; + float ystep3 = tri->dx31; + + quad->input.x0 = x; + quad->input.y0 = y; + quad->inout.mask = 0; + + if (c1 > 0 && + c2 > 0 && + c3 > 0) + quad->inout.mask |= 1; + + if (c1 + xstep1 > 0 && + c2 + xstep2 > 0 && + c3 + xstep3 > 0) + quad->inout.mask |= 2; + + if (c1 + ystep1 > 0 && + c2 + ystep2 > 0 && + c3 + ystep3 > 0) + quad->inout.mask |= 4; + + if (c1 + ystep1 + xstep1 > 0 && + c2 + ystep2 + xstep2 > 0 && + c3 + ystep3 + xstep3 > 0) + quad->inout.mask |= 8; + + if (quad->inout.mask) + rast->state->run( rast->state->state, &quad, 1 ); +} + +/* Evaluate each pixel in a block, generate a mask and possibly render + * the quad: + */ +static void +do_block( struct triangle *tri, + int x, int y, + float c1, + float c2, + float c3 ) +{ + const int step = 2; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + + int ix, iy; + + for (iy = 0; iy < BLOCKSIZE; iy += 2) { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (ix = 0; ix < BLOCKSIZE; ix += 2) { + + do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); + + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } +} + + + +/* Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle: + */ +void lp_rast_triangle( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri ) +{ + int minx, maxx, miny, maxy; + + /* Clamp to tile dimensions: + */ + minx = MAX2(tri->maxx, rast->x); + miny = MAX2(tri->miny, rast->y); + maxx = MIN2(tri->maxx, rast->x + TILESIZE); + maxy = MIN2(tri->maxy, rast->y + TILESIZE); + + if (miny == maxy || + minx == maxx) { + debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); + //assert(0); + return; + } + + /* Bind parameter interpolants: + */ + for (i = 0; i < Elements(rast->quad); i++) { + rast->quad[i].coef = tri->coef; + rast->quad[i].posCoef = &tri->position_coef; + } + + /* Small area? + */ + if (miny + 16 > maxy && + minx + 16 > maxx) + { + const int step = 2; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; + + int x, y; + + minx &= ~(step-1); + maxx &= ~(step-1); + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else + { + do_quad(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } + else + { + const int step = BLOCKSIZE; + + float ei1 = tri->ei1 * step; + float ei2 = tri->ei2 * step; + float ei3 = tri->ei3 * step; + + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + int x, y; + + minx &= ~(step-1); + miny &= ~(step-1); + + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full(&tri, x, y); /* trivial accept */ + } + else + { + do_block(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } +} + diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_rasterizer.c deleted file mode 100644 index 089ea597292..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_rasterizer.c +++ /dev/null @@ -1,157 +0,0 @@ - -struct lp_rasterizer { - - /* We can choose whatever layout for the internal tile storage we - * prefer: - */ - struct { - unsigned color[TILESIZE][TILESIZE]; - unsigned depth[TILESIZE][TILESIZE]; - char stencil[TILESIZE][TILESIZE]; - } tile; - - - unsigned x; - unsigned y; - - - struct { - struct pipe_surface *color; - struct pipe_surface *zstencil; - unsigned clear_color; - unsigned clear_depth; - char clear_stencil; - } state; -}; - -struct lp_rasterizer *lp_rast_create( void ) -{ - return CALLOC_STRUCT(lp_rasterizer); -} - -void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *color, - struct pipe_surface *zstencil, - const float *clear_color, - double clear_depth, - unsigned clear_stencil) -{ - pipe_surface_reference(&rast->state.color, color); - pipe_surface_reference(&rast->state.depth, depth); - rast->state.clear_color = util_pack_8888(clear_color); - rast->state.clear_depth = clear_depth * 0xffffffff; - rast->state.clear_stencil = clear_stencil; -} - -/* Begining of each tile: - */ -void lp_rast_start_tile( struct lp_rasterizer *, - unsigned x, - unsigned y ) -{ - rast->x = x; - rast->y = y; -} - -void lp_rast_clear_color( struct lp_rasterizer *rast ) -{ - const unsigned clear_color = rast->state.clear_color; - unsigned i, j; - - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_color; -} - -void lp_rast_clear_depth( struct lp_rasterizer *rast ) -{ - const unsigned clear_depth = rast->state.clear_depth; - unsigned i, j; - - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_depth; -} - -void lp_rast_clear_stencil( struct lp_rasterizer *rast ) -{ - const unsigned clear_stencil = rast->state.clear_stencil; - - memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); -} - -void lp_rast_load_color( struct lp_rasterizer *rast ) -{ - /* call u_tile func to load colors from surface */ -} - -void lp_rast_load_zstencil( struct lp_rasterizer *rast ) -{ - /* call u_tile func to load depth (and stencil?) from surface */ -} - -/* Within a tile: - */ -void lp_rast_set_state( struct lp_rasterizer *rast, - const struct lp_rast_state *state ) -{ - rast->shader_state = state; -} - -void lp_rast_triangle( struct lp_rasterizer *rast, - const struct lp_rast_triangle *inputs ) -{ - /* Set up the silly quad coef pointers - */ - for (i = 0; i < 4; i++) { - rast->quads[i].posCoef = inputs->posCoef; - rast->quads[i].coef = inputs->coef; - } - - /* Scan the tile in 4x4 chunks (?) and figure out which bits to - * rasterize: - */ - -} - -void lp_rast_shade_tile( struct lp_rasterizer *rast, - const struct lp_rast_shader_inputs *inputs ) -{ - /* Set up the silly quad coef pointers - */ - for (i = 0; i < 4; i++) { - rast->quads[i].posCoef = inputs->posCoef; - rast->quads[i].coef = inputs->coef; - } - - /* Use the existing preference for 8x2 (four quads) shading: - */ - for (i = 0; i < TILESIZE; i += 8) { - for (j = 0; j < TILESIZE; j += 2) { - rast->shader_state.shade( inputs->jc, - rast->x + i, - rast->y + j, - rast->quads, 4 ); - } - } -} - -/* End of tile: - */ -void lp_rast_store_color( struct lp_rasterizer *rast ) -{ - /* call u_tile func to store colors to surface */ -} - -void lp_rast_store_zstencil( struct lp_rasterizer *rast ) -{ - /* call u_tile func to store depth/stencil to surface */ -} - -/* Shutdown: - */ -void lp_rast_destroy( struct lp_rasterizer *rast ) -{ - FREE(rast); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.h b/src/gallium/drivers/llvmpipe/lp_rasterizer.h deleted file mode 100644 index b3ae06a1169..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_rasterizer.h +++ /dev/null @@ -1,112 +0,0 @@ - -/* Initially create and program a single rasterizer directly. Later - * will want multiple of these, one or two per core. At that stage - * will probably pass command buffers into the rasterizers rather than - * individual function calls like this. - */ -struct lp_rasterizer; - -struct lp_rast_state { - /* State: - */ - struct lp_jit_context jc; - - /* Shader itself: - */ -}; - -/* Coefficients necessary to run the shader at a given location: - */ -struct lp_rast_shader_inputs { - - /* Current rasterizer state: - */ - const struct lp_rast_state *state; - - /* Attribute interpolation: - */ - float oneoverarea; - float x1; - float y1; - - struct tgsi_interp_coef position_coef; - struct tgsi_interp_coef *coef; -}; - - -/* Rasterization information for a triangle known to be in this bin, - * plus inputs to run the shader: - */ -struct lp_rast_triangle { - /* one-pixel sized trivial accept offsets for each plane */ - float ei1; - float ei2; - float ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - float eo1; - float eo2; - float eo3; - - /* y deltas for vertex pairs */ - float dy12; - float dy23; - float dy31; - - /* x deltas for vertex pairs */ - float dx12; - float dx23; - float dx31; - - /* State to run the shader: */ - struct lp_rast_shader_inputs inputs; -}; - - - -struct lp_rasterizer *lp_rast_create( void ); - -void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *color, - struct pipe_surface *zstencil, - const float *clear_color, - double clear_depth, - unsigned clear_stencil); - -/* Begining of each tile: - */ -void lp_rast_start_tile( struct lp_rasterizer *, - unsigned x, - unsigned y ); - -void lp_rast_clear_color( struct lp_rasterizer * ); - -void lp_rast_clear_zstencil( struct lp_rasterizer * ); - -void lp_rast_load_color( struct lp_rasterizer * ); - -void lp_rast_load_zstencil( struct lp_rasterizer * ); - - -/* Within a tile: - */ -void lp_rast_set_state( struct lp_rasterizer *, - const struct lp_rast_state * ); - -void lp_rast_triangle( struct lp_rasterizer *, - const struct lp_rast_triangle * ); - -void lp_rast_shade_tile( struct lp_rasterizer *, - const struct lp_rast_shader_inputs * ); - -/* End of tile: - */ -void lp_rast_store_color( struct lp_rasterizer * ); - -void lp_rast_store_zstencil( struct lp_rasterizer * ); - - -/* Shutdown: - */ -void lp_rast_destroy( struct lp_rasterizer * ); - diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8c67524506e..d6e51888b91 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -48,6 +48,23 @@ #define DEBUG_VERTS 0 + +void +llvmpipe_setup_flush() +{ +} + +void +llvmpipe_setup_bind_framebuffer() +{ +} + +void +llvmpipe_setup_clear() +{ +} + + /* Stubs for lines & points for now: */ void diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c index 5b4faf489b8..bb7a4feb390 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c @@ -1,7 +1,20 @@ void -rasterize( struct llvmpipe_context *llvmpipe, - struct binned_scene *scene ) +lp_setup_rasterize( struct llvmpipe_context *llvmpipe, + struct binned_scene *scene ) { - + lp_rast_bind_surfaces( rast, scene->framebuffer ); + + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + + lp_rast_start_tile( rast, i * TILESIZE, j * TILESIZE ); + + for (block = scene->tile[i][j].first; block; block = block->next) { + for (k = 0; k < block->nr_cmds; k++) { + block->cmd[k].func( rast, block->cmd[k].arg ); + } + } + } + } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index a09e0fa643e..d43db7b123d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -728,9 +728,9 @@ static void triangle_nop( struct llvmpipe_context *llvmpipe, { } -/** - * Do setup for triangle rasterization, then render the triangle. - */ + + + void setup_prepare_tri( struct llvmpipe_context *llvmpipe ) { llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 31eaadda216..fcd31136b75 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -232,6 +232,22 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; } +static void +update_culling() +{ + if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && + lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && + lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { + /* we'll do culling */ + setup->winding = lp->rasterizer->cull_mode; + } + else { + /* 'draw' will do culling */ + setup->winding = PIPE_WINDING_NONE; + } +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ @@ -270,3 +286,14 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) llvmpipe->dirty = 0; } + + +void llvmpipe_prepare( ) +{ + struct llvmpipe_context *lp = setup->llvmpipe; + + if (lp->dirty) { + llvmpipe_update_derived(lp); + } + +} -- cgit v1.2.3 From 5e13dfe6181952f0f538a77b8a9f91c1d7601ceb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 12:15:12 +0100 Subject: llvmpipe: whip out the intra-tile code from lp_setup_tri.c The "setup" module handles building per-tile display lists. Intra-tile rendering is handled by lp_rast*.c --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 326 +++------------------------- 1 file changed, 33 insertions(+), 293 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d43db7b123d..98c87d551f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -26,12 +26,10 @@ **************************************************************************/ /* - * Recursive rasterization for triangles + * Binning code for triangles */ #include "lp_context.h" -#include "lp_quad.h" -#include "lp_quad_pipe.h" #include "lp_setup.h" #include "lp_state.h" #include "draw/draw_context.h" @@ -42,43 +40,6 @@ #include "util/u_math.h" #include "util/u_memory.h" -#define BLOCKSIZE 4 - -struct triangle { - /* one-pixel sized trivial accept offsets for each plane */ - float ei1; - float ei2; - float ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - float eo1; - float eo2; - float eo3; - - /* y deltas for vertex pairs */ - float dy12; - float dy23; - float dy31; - - /* x deltas for vertex pairs */ - float dx12; - float dx23; - float dx31; - - /* Attribute interpolation: - */ - float oneoverarea; - float x1; - float y1; - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; - struct tgsi_interp_coef position_coef; - - /* A run of pre-initialized quads: - */ - struct llvmpipe_context *llvmpipe; - struct quad_header quad[4]; -}; - /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). @@ -267,163 +228,6 @@ static inline float subpixel_snap( float a ) } -/* Convert 8x8 block into four runs of quads and render each in turn. - */ -#if (BLOCKSIZE == 8) -static void block_full( struct triangle *tri, int x, int y ) -{ - struct quad_header *ptrs[4]; - int i; - - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - tri->quad[2].input.x0 = x + 4; - tri->quad[3].input.x0 = x + 6; - - for (i = 0; i < 4; i++, y += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - tri->quad[2].inout.mask = 0xf; - tri->quad[3].inout.mask = 0xf; - - tri->quad[0].input.y0 = y; - tri->quad[1].input.y0 = y; - tri->quad[2].input.y0 = y; - tri->quad[3].input.y0 = y; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - ptrs[2] = &tri->quad[2]; - ptrs[3] = &tri->quad[3]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); - } -} -#elif (BLOCKSIZE == 4) -static void block_full( struct triangle *tri, int x, int y ) -{ - struct quad_header *ptrs[4]; - int iy; - - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - - for (iy = 0; iy < 4; iy += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - - tri->quad[0].input.y0 = y + iy; - tri->quad[1].input.y0 = y + iy; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); - } -} -#else -static void block_full( struct triangle *tri, int x, int y ) -{ - struct quad_header *ptrs[4]; - int iy; - - tri->quad[0].input.x0 = x; - tri->quad[0].input.y0 = y; - tri->quad[0].inout.mask = 0xf; - - ptrs[0] = &tri->quad[0]; - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 1 ); -} -#endif - - -static void -do_quad( struct triangle *tri, - int x, int y, - float c1, float c2, float c3 ) -{ - struct quad_header *quad = &tri->quad[0]; - - float xstep1 = -tri->dy12; - float xstep2 = -tri->dy23; - float xstep3 = -tri->dy31; - - float ystep1 = tri->dx12; - float ystep2 = tri->dx23; - float ystep3 = tri->dx31; - - quad->input.x0 = x; - quad->input.y0 = y; - quad->inout.mask = 0; - - if (c1 > 0 && - c2 > 0 && - c3 > 0) - quad->inout.mask |= 1; - - if (c1 + xstep1 > 0 && - c2 + xstep2 > 0 && - c3 + xstep3 > 0) - quad->inout.mask |= 2; - - if (c1 + ystep1 > 0 && - c2 + ystep2 > 0 && - c3 + ystep3 > 0) - quad->inout.mask |= 4; - - if (c1 + ystep1 + xstep1 > 0 && - c2 + ystep2 + xstep2 > 0 && - c3 + ystep3 + xstep3 > 0) - quad->inout.mask |= 8; - - if (quad->inout.mask) - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, &quad, 1 ); -} - -/* Evaluate each pixel in a block, generate a mask and possibly render - * the quad: - */ -static void -do_block( struct triangle *tri, - int x, int y, - float c1, - float c2, - float c3 ) -{ - const int step = 2; - - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; - - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; - - int ix, iy; - - for (iy = 0; iy < BLOCKSIZE; iy += 2) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (ix = 0; ix < BLOCKSIZE; ix += 2) { - - do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); - - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } -} - @@ -441,14 +245,14 @@ do_block( struct triangle *tri, #define MAX3(a,b,c) MAX2(MAX2(a,b),c) static void -do_triangle_ccw(struct llvmpipe_context *llvmpipe, +do_triangle_ccw(struct lp_setup *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { - const int rt_width = llvmpipe->framebuffer.cbufs[0]->width; - const int rt_height = llvmpipe->framebuffer.cbufs[0]->height; + const int rt_width = setup->framebuffer.cbufs[0]->width; + const int rt_height = setup->framebuffer.cbufs[0]->height; const float y1 = subpixel_snap(v1[0][1]); const float y2 = subpixel_snap(v2[0][1]); @@ -458,15 +262,12 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle tri; + struct triangle *tri = allocate_triangle; float area; float c1, c2, c3; int i; int minx, maxx, miny, maxy; - tri.llvmpipe = llvmpipe; - - tri.dx12 = x1 - x2; tri.dx23 = x2 - x3; tri.dx31 = x3 - x1; @@ -505,12 +306,7 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, /* Setup parameter interpolants: */ - setup_tri_coefficients( llvmpipe, &tri, v1, v2, v3, frontfacing ); - - for (i = 0; i < Elements(tri.quad); i++) { - tri.quad[i].coef = tri.coef; - tri.quad[i].posCoef = &tri.position_coef; - } + setup_tri_coefficients( setup, &tri, v1, v2, v3, frontfacing ); /* half-edge constants, will be interated over the whole * rendertarget. @@ -548,73 +344,22 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; - minx &= ~(BLOCKSIZE-1); /* aligned blocks */ - miny &= ~(BLOCKSIZE-1); /* aligned blocks */ + minx &= ~(TILESIZE-1); /* aligned blocks */ + miny &= ~(TILESIZE-1); /* aligned blocks */ c1 += tri.dx12 * miny - tri.dy12 * minx; c2 += tri.dx23 * miny - tri.dy23 * minx; c3 += tri.dx31 * miny - tri.dy31 * minx; - if ((miny & ~15) == (maxy & ~15) && - (minx & ~15) == (maxx & ~15)) + if (miny + TILESIZE > maxy && + minx + TILESIZE > maxx) { - const int step = 2; - - float xstep1 = -step * tri.dy12; - float xstep2 = -step * tri.dy23; - float xstep3 = -step * tri.dy31; - - float ystep1 = step * tri.dx12; - float ystep2 = step * tri.dx23; - float ystep3 = step * tri.dx31; - - float eo1 = tri.eo1 * step; - float eo2 = tri.eo2 * step; - float eo3 = tri.eo3 * step; - - int x, y; - - /* Subdivide space into NxM blocks, where each block is square and - * power-of-four in dimension. - * - * Trivially accept or reject blocks, else jump to per-pixel - * examination above. + /* Triangle is contained in a single tile: */ - for (y = miny; y < maxy; y += step) - { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (x = minx; x < maxx; x += step) - { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else - { - do_quad(&tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } } else { - const int step = BLOCKSIZE; + const int step = TILESIZE; float ei1 = tri.ei1 * step; float ei2 = tri.ei2 * step; @@ -645,7 +390,6 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, float cx1 = c1; float cx2 = c2; float cx3 = c3; - boolean in = false; for (x = minx; x < maxx; x += step) { @@ -654,21 +398,18 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, cx3 + eo3 < 0) { /* do nothing */ - if (in) - break; } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && cx3 + ei3 > 0) { - in = TRUE; - block_full(&tri, x, y); /* trivial accept */ + /* shade whole tile */ + bin_command(tile[x][y], lp_rast_shade_tile, &tri->inputs ); } else { - in = TRUE; - // block_full(&tri, x, y); /* trivial accept */ - do_block(&tri, x, y, cx1, cx2, cx3); + /* shade partial tile */ + bin_command(tile[x][y], lp_rast_triangle, &tri ); } /* Iterate cx values across the region: @@ -687,23 +428,23 @@ do_triangle_ccw(struct llvmpipe_context *llvmpipe, } } -static void triangle_cw( struct llvmpipe_context *llvmpipe, +static void triangle_cw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - do_triangle_ccw( llvmpipe, v1, v0, v2, !llvmpipe->ccw_is_frontface ); + do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); } -static void triangle_ccw( struct llvmpipe_context *llvmpipe, +static void triangle_ccw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - do_triangle_ccw( llvmpipe, v0, v1, v2, llvmpipe->ccw_is_frontface ); + do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); } -static void triangle_both( struct llvmpipe_context *llvmpipe, +static void triangle_both( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -716,38 +457,37 @@ static void triangle_both( struct llvmpipe_context *llvmpipe, /* det = cross(e,f).z */ if (ex * fy - ey * fx < 0) - triangle_ccw( llvmpipe, v0, v1, v2 ); + triangle_ccw( setup, v0, v1, v2 ); else - triangle_cw( llvmpipe, v0, v1, v2 ); + triangle_cw( setup, v0, v1, v2 ); } -static void triangle_nop( struct llvmpipe_context *llvmpipe, +static void triangle_nop( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { } - - - -void setup_prepare_tri( struct llvmpipe_context *llvmpipe ) +void setup_prepare_tri( struct setup_context *setup ) { - llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == - PIPE_WINDING_CW); + struct llvmpipe_context *llvmpipe = setup->llvmpipe; + + setup->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == + PIPE_WINDING_CW); switch (llvmpipe->rasterizer->cull_mode) { case PIPE_WINDING_NONE: - llvmpipe->triangle = triangle_both; + setup->triangle = triangle_both; break; case PIPE_WINDING_CCW: - llvmpipe->triangle = triangle_cw; + setup->triangle = triangle_cw; break; case PIPE_WINDING_CW: - llvmpipe->triangle = triangle_ccw; + setup->triangle = triangle_ccw; break; default: - llvmpipe->triangle = triangle_nop; + setup->triangle = triangle_nop; break; } } -- cgit v1.2.3 From a6676d896ed18426ed3d7e6340347974c1694ca2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 12:44:30 +0100 Subject: llvmpipe: Add the rast -> jit shader glue. Ugly code. Will eventually be reduced to a very thin inlined function. --- src/gallium/drivers/llvmpipe/lp_rast.c | 58 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 5 +-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 18 ++++++--- 3 files changed, 73 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 4771f821b3c..58ef108123d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -98,6 +98,64 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, } } + +void lp_rast_shade_quads( const struct lp_rast_state *state, + struct lp_rast_tile *tile, + struct quad_header **quads, + unsigned nr ) +{ + struct lp_fragment_shader *fs = llvmpipe->fs; + struct quad_header *quad = quads[0]; + const unsigned x = quad->input.x0; + const unsigned y = quad->input.y0; + uint8_t *color; + uint8_t *depth; + uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + unsigned chan_index; + unsigned q; + + /* Sanity checks */ + assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); + assert(x % TILE_VECTOR_WIDTH == 0); + assert(y % TILE_VECTOR_HEIGHT == 0); + for (q = 0; q < nr; ++q) { + assert(quads[q]->input.x0 == x + q*2); + assert(quads[q]->input.y0 == y); + } + + /* mask */ + for (q = 0; q < 4; ++q) + for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) + mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; + + /* color buffer */ + color = &TILE_PIXEL(tile->color, x, y, 0); + + /* depth buffer */ + assert((x % 2) == 0); + assert((y % 2) == 0); + depth = (uint8_t)*tile->depth + y*TILE_SIZE*4 + 2*x*4; + + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); + + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(state->jc.blend_color, 16)); + + /* run shader */ + state->jit_function( &state->jc, + x, y, + quad->coef->a0, + quad->coef->dadx, + quad->coef->dady, + &mask[0][0], + color, + depth); + +} + + /* End of tile: */ void lp_rast_store_color( struct lp_rasterizer *rast ) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 8f4bd52c9e5..e417be935b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -17,9 +17,8 @@ struct lp_rast_state { /* The shader itself. Probably we also need to pass a pointer to * the tile color/z/stencil data somehow: */ - void (*run)( struct lp_jit_context *jc, - struct quad_header **quads, - unsigned nr ); + lp_jit_frag_func shader; + }; /* Coefficients necessary to run the shader at a given location: diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 538ec225511..7eced38d672 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -3,16 +3,24 @@ #include "lp_rast.h" + +/* We can choose whatever layout for the internal tile storage we + * prefer: + */ +struct lp_rast_tile +{ + uint8_t *color; + + uint8_t *depth; +}; + + struct lp_rasterizer { /* We can choose whatever layout for the internal tile storage we * prefer: */ - struct { - unsigned color[TILESIZE][TILESIZE]; - unsigned depth[TILESIZE][TILESIZE]; - char stencil[TILESIZE][TILESIZE]; - } tile; + struct lp_rast_tile tile; unsigned x; -- cgit v1.2.3 From 46df37ebfa83d7d06f4adebfbe201fed5bf2ecab Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 12:44:52 +0100 Subject: llvmpipe: Update SConscript. --- src/gallium/drivers/llvmpipe/SConscript | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 344b2463377..5e0fadc247c 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -46,8 +46,10 @@ llvmpipe = env.ConvenienceLibrary( 'lp_flush.c', 'lp_jit.c', 'lp_prim_vbuf.c', + 'lp_query.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_setup.c', - 'lp_query.c', 'lp_screen.c', 'lp_state_blend.c', 'lp_state_clip.c', -- cgit v1.2.3 From d614ced756f2cca64ec83b122da4cd028c08c0eb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 12:51:46 +0100 Subject: llvmpipe: Update includes and copyright headers. --- src/gallium/drivers/llvmpipe/lp_rast.c | 33 ++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 26 +++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_setup.c | 1 - 4 files changed, 61 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 58ef108123d..df48ccce81d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -1,3 +1,36 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" + +#include "lp_state.h" +#include "lp_quad.h" +#include "lp_rast.h" + struct lp_rasterizer *lp_rast_create( void ) { diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e417be935b0..dadde2e8635 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -1,3 +1,29 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef LP_RAST_H #define LP_RAST_H diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 4b7b3719de2..40965d5f659 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d6e51888b91..ac9bfad3f21 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -34,7 +34,6 @@ #include "lp_context.h" #include "lp_quad.h" -#include "lp_quad_pipe.h" #include "lp_setup.h" #include "lp_state.h" #include "draw/draw_context.h" -- cgit v1.2.3 From 931210424bc46b2c13919f0ac3e0ef781eff207e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 15:44:29 +0100 Subject: llvmpipe: wip me harder --- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 84 +++--- src/gallium/drivers/llvmpipe/lp_rast.h | 11 + src/gallium/drivers/llvmpipe/lp_setup.c | 343 ++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_setup.h | 44 ++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 187 ++++++------ src/gallium/drivers/llvmpipe/lp_setup_rasterize.c | 20 -- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 151 +++++----- 7 files changed, 536 insertions(+), 304 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_setup_rasterize.c (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 8cccb2905b7..6c51d40a8f4 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -171,14 +171,14 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, + lp_setup_point( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -186,7 +186,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -194,12 +194,12 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } if (nr) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[nr-1], stride), get_vert(vertex_buffer, indices[0], stride) ); } @@ -208,7 +208,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLES: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -216,7 +216,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -227,7 +227,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-1], stride), get_vert(vertex_buffer, indices[i-(i&1)], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -235,7 +235,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -246,7 +246,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride) ); @@ -254,7 +254,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -265,11 +265,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUADS: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -277,12 +277,12 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -293,11 +293,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUAD_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride)); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -305,11 +305,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -324,7 +324,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[0], stride) ); @@ -355,14 +355,14 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, + lp_setup_point( setup_ctx, get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -370,7 +370,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -378,12 +378,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } if (nr) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, nr-1, stride), get_vert(vertex_buffer, 0, stride) ); } @@ -392,7 +392,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLES: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-2, stride) ); @@ -400,7 +400,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -411,7 +411,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-1, stride), get_vert(vertex_buffer, i-(i&1), stride), get_vert(vertex_buffer, i-2, stride) ); @@ -419,7 +419,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -430,7 +430,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride) ); @@ -438,7 +438,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -449,11 +449,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUADS: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -461,11 +461,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -476,11 +476,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUAD_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -488,11 +488,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -507,7 +507,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride) ); @@ -525,7 +525,7 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - llvmpipe_setup_destroy_context(cvbr->setup); + lp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -556,7 +556,7 @@ lp_create_vbuf_backend(struct llvmpipe_context *lp) cvbr->llvmpipe = lp; - cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); + cvbr->setup = lp_setup_create_context(cvbr->llvmpipe); return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index dadde2e8635..33a6065b89c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -90,6 +90,17 @@ struct lp_rast_triangle { struct lp_rast_shader_inputs inputs; }; +struct clear_tile { + boolean do_color; + boolean do_depth_stencil; + unsigned rgba; + unsigned depth_stencil; +}; + +struct load_tile { + boolean do_color; + boolean do_depth_stencil; +}; struct lp_rasterizer *lp_rast_create( void ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ac9bfad3f21..514366b71f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -26,124 +26,337 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines) + * Tiling engine. * - * \author Keith Whitwell - * \author Brian Paul + * Builds per-tile display lists and executes them on calls to + * lp_setup_flush(). */ -#include "lp_context.h" -#include "lp_quad.h" #include "lp_setup.h" -#include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +void lp_setup_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} -#define DEBUG_VERTS 0 +void lp_setup_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} +static void reset_context( struct setup_context *setup ) +{ + for (i = 0; i < setup->tiles_x; i++) { + for (j = 0; j < setup->tiles_y; j++) { + struct cmd_block_list *list = scene->tile[i][j]; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->first; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + list->first = list->tail; + } + } -void -llvmpipe_setup_flush() + { + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->first; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + list->first = list->tail; + } +} + + + + +/* Add a command to all active bins. + */ +static void bin_everywhere( struct setup_context *setup, + bin_cmd cmd, + const union lp_rast_cmd_arg *arg ) { + unsigned i, j; + for (i = 0; i < setup->tiles_x; i++) + for (j = 0; j < setup->tiles_y; j++) + bin_cmd( setup, &setup->tile[i][j], cmd, arg ); } -void -llvmpipe_setup_bind_framebuffer() + +static void +rasterize_bins( struct setup_context *setup, + struct lp_rast *rast, + boolean write_depth ) { + lp_rast_bind_color( rast, + scene->fb.color, + TRUE ); /* WRITE */ + + lp_rast_bind_depth( rast, + scene->fb.depth, + write_depth ); /* WRITE */ + + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + + lp_rast_start_tile( rast, + i * TILESIZE, + j * TILESIZE ); + + for (block = scene->tile[i][j].first; block; block = block->next) { + for (k = 0; k < block->nr_cmds; k++) { + block->cmd[k].func( rast, block->cmd[k].arg ); + } + } + + lp_rast_finish_tile( rast ); + } + } + + lp_setup_free_data( setup ); } -void -llvmpipe_setup_clear() + + +static void +begin_binning( struct setup_context *setup ) { + if (setup->fb.color) { + if (setup->fb.clear_color) + bin_everywhere( setup, + lp_rast_clear_color, + &setup->clear_data ); + else + bin_everywhere( setup, + lp_rast_load_color, + NULL ); + } + + if (setup->fb.zstencil) { + if (setup->fb.clear_zstencil) + bin_everywhere( setup, + lp_rast_clear_zstencil, + &setup->clear_data ); + else + bin_everywhere( setup, + lp_rast_load_zstencil, + NULL ); + } } -/* Stubs for lines & points for now: +/* This basically bins and then flushes any outstanding full-screen + * clears. + * + * TODO: fast path for fullscreen clears and no triangles. */ -void -llvmpipe_setup_point(struct setup_context *setup, - const float (*v0)[4]) +static void +execute_clears( struct setup_context *setup ) { + begin_binning( setup ); + rasterize_bins( setup ); } + +static void +set_state( struct setup_context *setup, + unsigned new_state ) +{ + unsigned old_state = setup->state; + + if (old_state == new_state) + return; + + switch (new_state) { + case SETUP_ACTIVE: + if (old_state == SETUP_FLUSHED) + setup_begin_binning( setup ); + break; + + case SETUP_CLEARED: + if (old_state == SETUP_ACTIVE) { + assert(0); + return; + } + break; + + case SETUP_FLUSHED: + if (old_state == SETUP_CLEAR) + execute_clears( setup ); + else + rasterize_bins( setup ); + break; + } + + setup->state = new_state; +} + + void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) +lp_setup_flush( struct setup_context *setup, + unsigned flags ) { + set_state( setup, SETUP_FLUSHED ); } -/* Called after statechange, before emitting primitives. If binning - * is active, this function should store relevant state in the binning - * context. - * - * That includes: - * - current fragment shader function - * - bound constant buffer contents - * - bound textures - * - blend color - * - etc. - * - * Basically everything needed at some point in the future to - * rasterize triangles for the current state. - * - * Additionally this will set up the state needed for the rasterizer - * to process and bin incoming triangles. That would include such - * things as: - * - cull mode - * - ??? - * - etc. - * - */ -void setup_prepare( struct setup_context *setup ) +void +lp_setup_bind_framebuffer( struct setup_context *setup, + struct pipe_surface *color, + struct pipe_surface *zstencil ) { - struct llvmpipe_context *lp = setup->llvmpipe; + unsigned width, height; - if (lp->dirty) { - llvmpipe_update_derived(lp); - } + set_state( setup, SETUP_FLUSHED ); + + pipe_surface_reference( &setup->fb.color, color ); + pipe_surface_reference( &setup->fb.zstencil, zstencil ); + + width = MAX2( color->width, zstencil->width ); + height = MAX2( color->height, zstencil->height ); + + setup->tiles_x = align( width, TILESIZE ) / TILESIZE; + setup->tiles_y = align( height, TILESIZE ) / TILESIZE; +} + +void +lp_setup_clear( struct setup_context *setup, + const float *clear_color, + double clear_depth, + unsigned clear_stencil, + unsigned flags ) +{ + if (setup->state == SETUP_ACTIVE) { + struct lp_rast_clear_info *clear_info; + unsigned i, j; + + clear_info = alloc_clear_info( setup ); - lp->quad.first->begin( lp->quad.first ); + if (flags & PIPE_CLEAR_COLOR) { + pack_color( setup, + clear_info->color, + clear_color ); + bin_everywhere(setup, lp_rast_clear_color, clear_info ); + } - if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && - lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && - lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { - /* we'll do culling */ - setup->winding = lp->rasterizer->cull_mode; + if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + pack_depth_stencil( setup, + clear_info->depth, + clear_depth, + clear_stencil ); + + bin_everywhere(setup, lp_rast_clear_zstencil, clear_info ); + } } else { - /* 'draw' will do culling */ - setup->winding = PIPE_WINDING_NONE; + set_state( setup, SETUP_CLEARED ); + setup->clear.flags |= flags; + + if (flags & PIPE_CLEAR_COLOR) { + memcpy(setup->clear.color, color, sizeof setup->clear.color); + } + + if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + setup->clear.depth = clear_depth; + setup->clear.stencil = clear_stencil; + } } +} + + +void +lp_setup_set_fs_inputs( struct setup_context *setup, + const enum lp_interp *interp, + unsigned nr ) +{ + memcpy( setup->interp, interp, nr * sizeof interp[0] ); +} - setup_prepare_tri( setup->llvmpipe ); + +static void +first_triangle( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + set_state( setup, STATE_ACTIVE ); + setup_choose_triangle( setup, v0, v1, v2 ); +} + + + +/* Stubs for lines & points for now: + */ +void +lp_setup_point(struct setup_context *setup, + const float (*v0)[4]) +{ + setup->point( setup, v0 ); } +void +lp_setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + setup->line( setup, v0, v1 ); +} + +void +lp_setup_triangle(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + setup->triangle( setup, v0, v1, v2 ); +} void setup_destroy_context( struct setup_context *setup ) { + lp_rast_destroy( setup->rast ); FREE( setup ); } /** - * Create a new primitive setup/render stage. + * Create a new primitive tiling engine. Currently also creates a + * rasterizer to use with it. */ -struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ) +struct setup_context *setup_create_context( void ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); - unsigned i; - setup->llvmpipe = llvmpipe; + setup->rast = lp_rast_create( void ); + if (!setup->rast) + goto fail; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) + setup->tile[i][j].first = + setup->tile[i][j].next = CALLOC_STRUCT(cmd_block); return setup; + +fail: + FREE(setup); + return NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 05aaaf83b8e..2542faad36b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -27,28 +27,46 @@ #ifndef LP_SETUP_H #define LP_SETUP_H + +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + struct setup_context; -struct llvmpipe_context; -/* Note, not using setup_context currently - */ +struct setup_context * +lp_setup_create( void ); void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]); +lp_setup_triangle(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v1)[4]); void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ); - +lp_setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); -struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ); +void +lp_setup_point( struct setup_context *setup, + const float (*v0)[4] ); -void setup_prepare( struct setup_context *setup ); +void +lp_setup_set_triangle_state( struct setup_context *setup, + unsigned cullmode, + boolean front_is_ccw ); -void setup_destroy_context( struct setup_context *setup ); +void +lp_setup_set_fs_inputs( struct setup_context *setup, + const enum lp_interp *interp, + unsigned nr ); -void setup_prepare_tri( struct llvmpipe_context *llvmpipe ); +void +lp_setup_destroy( struct setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 848705e0991..91540d6751e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -27,114 +27,125 @@ #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H -struct clear_tile { - boolean do_color; - boolean do_depth_stencil; - unsigned rgba; - unsigned depth_stencil; -}; - -struct load_tile { - boolean do_color; - boolean do_depth_stencil; -}; -/* Shade tile points directly at this: - */ -struct shader_inputs { - /* Some way of updating rasterizer state: - */ - /* ??? */ - - /* Attribute interpolation: - */ - float oneoverarea; - float x1; - float y1; - - struct tgsi_interp_coef position_coef; - struct tgsi_interp_coef *coef; -}; +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) -/* Shade triangle points at this: +/* switch to a non-pointer value for this: */ -struct shade_triangle { - /* one-pixel sized trivial accept offsets for each plane */ - float ei1; - float ei2; - float ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - float eo1; - float eo2; - float eo3; - - /* y deltas for vertex pairs */ - float dy12; - float dy23; - float dy31; - - /* x deltas for vertex pairs */ - float dx12; - float dx23; - float dx31; - - struct shader_inputs inputs; -}; - -struct bin_cmd { - enum { - CMD_END = 0, - CMD_CLEAR, - CMD_LOAD_TILE, - CMD_SHADE_TILE, - CMD_SHADE_TRIANGLE, - } cmd; - - union { - struct triangle *tri; - struct clear *clear; - } ptr; -}; +typedef void (*lp_rast_cmd)( struct lp_rast *, const union lp_rast_cmd_arg * ); struct cmd_block { - struct bin_cmd cmds[128]; + union lp_rast_arg *arg[CMD_BLOCK_MAX]; + lp_rast_cmd cmd[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; -/* Triangles - */ struct data_block { - ubyte data[4096 - sizeof(unsigned) - sizeof(struct cmd_block *)]; - unsigned count; + ubyte data[DATA_BLOCK_SZ]; + unsigned used; struct data_block *next; }; -/* Need to store the state at the time the triangle was drawn, at - * least as it is needed during rasterization. That would include at - * minimum the constant values referred to by the fragment shader, - * blend state, etc. Much of this is code-generated into the shader - * in llvmpipe -- may be easier to do this work there. - */ -struct state_block { +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; }; +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + -/** - * Basically all the data from a binner scene: +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. */ -struct binned_scene { - struct llvmpipe_context *llvmpipe; +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 + +struct setup_context { + + /* When there are multiple threads, will want to double-buffer the + * bin arrays: + */ + struct cmd_block_list bin[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; + struct data_block_list data; + + unsigned tiles_x; + unsigned tiles_y; - struct cmd_block *bin[MAX_HEIGHT / BIN_SIZE][MAX_WIDTH / BIN_SIZE]; - struct data_block *data; + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + } fb; + + struct { + unsigned flags; + float clear_color[4]; + double clear_depth; + unsigned clear_stencil; + } clear; + + enum { + SETUP_FLUSHED, + SETUP_CLEARED, + SETUP_ACTIVE + } state; + + struct { + enum lp_interp inputs[PIPE_MAX_ATTRIBS]; + unsigned nr_inputs; + } fs; + + void (*point)( struct setup_context *, + const float (*v0)[4]); + + void (*line)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4]); + + void (*triangle)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4], + const float (*v1)[4]); }; -static INLINE struct triangle *get_triangle( struct setup_context *setup ) +static INLINE void *get_data( struct data_block_list *list, + unsigned size) { - if (setup->triangles->count == TRIANGLE_BLOCK_COUNT) - return setup_triangle_from_new_block( setup ); - return &setup->triangles[setup->triangles->count++]; + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_setup_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + char *data = tail->data + tail->used; + tail->used += size; + return data; + } +} + +/* Add a command to a given bin. + */ +static INLINE void bin_cmd( struct cmd_block_list *list, + bin_cmd cmd, + const union lp_rast_cmd_arg *arg ) +{ + if (list->tail.count == CMD_BLOCK_MAX) { + lp_setup_new_cmd_block( list ) + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } } + + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c deleted file mode 100644 index bb7a4feb390..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c +++ /dev/null @@ -1,20 +0,0 @@ - -void -lp_setup_rasterize( struct llvmpipe_context *llvmpipe, - struct binned_scene *scene ) -{ - lp_rast_bind_surfaces( rast, scene->framebuffer ); - - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { - - lp_rast_start_tile( rast, i * TILESIZE, j * TILESIZE ); - - for (block = scene->tile[i][j].first; block; block = block->next) { - for (k = 0; k < block->nr_cmds; k++) { - block->cmd[k].func( rast, block->cmd[k].arg ); - } - } - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 98c87d551f0..75a0ea88881 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,14 +29,8 @@ * Binning code for triangles */ -#include "lp_context.h" #include "lp_setup.h" #include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -163,56 +157,55 @@ setup_fragcoord_coef(struct triangle *tri, unsigned slot) /** * Compute the tri->coef[] array dadx, dady, a0 values. */ -static void setup_tri_coefficients( struct llvmpipe_context *llvmpipe, +static void setup_tri_coefficients( struct setup_context *setup, struct triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontface ) { - const struct lp_fragment_shader *fs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); + const struct vertex_info *vinfo = setup->vinfo; unsigned input; /* z and w are done by linear interpolation: */ - linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 2); - linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 3); + linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2); + linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3); /* setup interpolation for all the remaining attributes: */ - for (input = 0; input < fs->info.num_inputs; input++) { + for (input = 0; input < vinfo->num_fs_inputs; input++) { unsigned vert_attr = vinfo->attrib[input].src_index; unsigned i; switch (vinfo->attrib[input].interp_mode) { case INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(&tri->coef[input], v3, vert_attr, i); + constant_coef(tri->coef[input], v3, vert_attr, i); break; case INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); break; case INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); break; case INTERP_POS: setup_fragcoord_coef(tri, input); break; - default: - assert(0); - } - - if (fs->info.input_semantic_name[input] == TGSI_SEMANTIC_FACE) { + case INTERP_FACING: tri->coef[input].a0[0] = 1.0f - frontface; tri->coef[input].dadx[0] = 0.0; tri->coef[input].dady[0] = 0.0; + break; + + default: + assert(0); } } } @@ -262,22 +255,22 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle *tri = allocate_triangle; + struct triangle *tri = allocate_triangle( setup ); float area; float c1, c2, c3; int i; int minx, maxx, miny, maxy; - tri.dx12 = x1 - x2; - tri.dx23 = x2 - x3; - tri.dx31 = x3 - x1; + tri->dx12 = x1 - x2; + tri->dx23 = x2 - x3; + tri->dx31 = x3 - x1; - tri.dy12 = y1 - y2; - tri.dy23 = y2 - y3; - tri.dy31 = y3 - y1; + tri->dy12 = y1 - y2; + tri->dy23 = y2 - y3; + tri->dy31 = y3 - y1; - area = (tri.dx12 * tri.dy31 - - tri.dx31 * tri.dy12); + area = (tri->dx12 * tri->dy31 - + tri->dx31 * tri->dy12); /* Cull non-ccw and zero-sized triangles. */ @@ -302,80 +295,87 @@ do_triangle_ccw(struct lp_setup *setup, /* The only divide in this code. Is it really needed? */ - tri.oneoverarea = 1.0f / area; + tri->oneoverarea = 1.0f / area; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, &tri, v1, v2, v3, frontfacing ); + setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing ); /* half-edge constants, will be interated over the whole * rendertarget. */ - c1 = tri.dy12 * x1 - tri.dx12 * y1; - c2 = tri.dy23 * x2 - tri.dx23 * y2; - c3 = tri.dy31 * x3 - tri.dx31 * y3; + c1 = tri->dy12 * x1 - tri->dx12 * y1; + c2 = tri->dy23 * x2 - tri->dx23 * y2; + c3 = tri->dy31 * x3 - tri->dx31 * y3; /* correct for top-left fill convention: */ - if (tri.dy12 < 0 || (tri.dy12 == 0 && tri.dx12 > 0)) c1++; - if (tri.dy23 < 0 || (tri.dy23 == 0 && tri.dx23 > 0)) c2++; - if (tri.dy31 < 0 || (tri.dy31 == 0 && tri.dx31 > 0)) c3++; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - tri.eo1 = 0; - if (tri.dy12 < 0) tri.eo1 -= tri.dy12; - if (tri.dx12 > 0) tri.eo1 += tri.dx12; + tri->eo1 = 0; + if (tri->dy12 < 0) tri->eo1 -= tri->dy12; + if (tri->dx12 > 0) tri->eo1 += tri->dx12; - tri.eo2 = 0; - if (tri.dy23 < 0) tri.eo2 -= tri.dy23; - if (tri.dx23 > 0) tri.eo2 += tri.dx23; + tri->eo2 = 0; + if (tri->dy23 < 0) tri->eo2 -= tri->dy23; + if (tri->dx23 > 0) tri->eo2 += tri->dx23; - tri.eo3 = 0; - if (tri.dy31 < 0) tri.eo3 -= tri.dy31; - if (tri.dx31 > 0) tri.eo3 += tri.dx31; + tri->eo3 = 0; + if (tri->dy31 < 0) tri->eo3 -= tri->dy31; + if (tri->dx31 > 0) tri->eo3 += tri->dx31; /* Calculate trivial accept offsets from the above. */ - tri.ei1 = tri.dx12 - tri.dy12 - tri.eo1; - tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; - tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; + tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; + tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; + tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; minx &= ~(TILESIZE-1); /* aligned blocks */ miny &= ~(TILESIZE-1); /* aligned blocks */ - c1 += tri.dx12 * miny - tri.dy12 * minx; - c2 += tri.dx23 * miny - tri.dy23 * minx; - c3 += tri.dx31 * miny - tri.dy31 * minx; + c1 += tri->dx12 * miny - tri->dy12 * minx; + c2 += tri->dx23 * miny - tri->dy23 * minx; + c3 += tri->dx31 * miny - tri->dy31 * minx; - if (miny + TILESIZE > maxy && - minx + TILESIZE > maxx) + /* Convert to tile coordinates: + */ + minx /= TILESIZE; + maxx /= TILESIZE; + miny /= TILESIZE; + maxy /= TILESIZE; + + if (miny == maxy && minx == maxx) { /* Triangle is contained in a single tile: */ + bin_command(setup->tile[minx][miny], lp_rast_triangle, tri ); } else { const int step = TILESIZE; - float ei1 = tri.ei1 * step; - float ei2 = tri.ei2 * step; - float ei3 = tri.ei3 * step; + float ei1 = tri->ei1 * step; + float ei2 = tri->ei2 * step; + float ei3 = tri->ei3 * step; - float eo1 = tri.eo1 * step; - float eo2 = tri.eo2 * step; - float eo3 = tri.eo3 * step; + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; - float xstep1 = -step * tri.dy12; - float xstep2 = -step * tri.dy23; - float xstep3 = -step * tri.dy31; + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; - float ystep1 = step * tri.dx12; - float ystep2 = step * tri.dx23; - float ystep3 = step * tri.dx31; + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; int x, y; @@ -385,13 +385,13 @@ do_triangle_ccw(struct lp_setup *setup, * Trivially accept or reject blocks, else jump to per-pixel * examination above. */ - for (y = miny; y < maxy; y += step) + for (y = miny; y < maxy; y++) { float cx1 = c1; float cx2 = c2; float cx3 = c3; - for (x = minx; x < maxx; x += step) + for (x = minx; x < maxx; x++) { if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || @@ -404,12 +404,12 @@ do_triangle_ccw(struct lp_setup *setup, cx3 + ei3 > 0) { /* shade whole tile */ - bin_command(tile[x][y], lp_rast_shade_tile, &tri->inputs ); + bin_command(setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); } else { /* shade partial tile */ - bin_command(tile[x][y], lp_rast_triangle, &tri ); + bin_command(setup->tile[x][y], lp_rast_triangle, tri ); } /* Iterate cx values across the region: @@ -469,14 +469,13 @@ static void triangle_nop( struct setup_context *setup, { } -void setup_prepare_tri( struct setup_context *setup ) +void setup_set_tri_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface) { - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - - setup->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == - PIPE_WINDING_CW); + setup->ccw_is_frontface = ccw_is_frontface; - switch (llvmpipe->rasterizer->cull_mode) { + switch (cull_mode) { case PIPE_WINDING_NONE: setup->triangle = triangle_both; break; -- cgit v1.2.3 From d2e2b75633b5ac8eef20fd3c6846d871a6d7eb1a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 16:32:03 +0100 Subject: llvmpipe: rewrite llvmpipe_clear() --- src/gallium/drivers/llvmpipe/lp_clear.c | 36 +++++++-------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index bdcff94b9bf..9efb3d40839 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -33,12 +33,10 @@ #include "pipe/p_defines.h" -#include "util/u_pack_color.h" #include "lp_clear.h" #include "lp_context.h" -#include "lp_surface.h" +#include "lp_setup.h" #include "lp_state.h" -#include "lp_tile_cache.h" /** @@ -46,36 +44,16 @@ * No masking, no scissor (clear entire buffer). */ void -llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil) +llvmpipe_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - unsigned cv; - uint i; if (llvmpipe->no_rast) return; -#if 0 - llvmpipe_update_derived(llvmpipe); /* not needed?? */ -#endif - - if (buffers & PIPE_CLEAR_COLOR) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - struct pipe_surface *ps = llvmpipe->framebuffer.cbufs[i]; - - util_pack_color(rgba, ps->format, &cv); - lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); - } - llvmpipe->dirty_render_cache = TRUE; - } - - if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { - struct pipe_surface *ps = llvmpipe->framebuffer.zsbuf; - - cv = util_pack_z_stencil(ps->format, depth, stencil); - - /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); - } + lp_setup_clear( llvmpipe->setup, buffers, rgba, depth, stencil ); } -- cgit v1.2.3 From 6b65685def525a8023ee936e82e53af2bc4e38b2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 16:33:12 +0100 Subject: llvmpipe: remove tex tile cache and related code --- src/gallium/drivers/llvmpipe/lp_context.c | 70 +- src/gallium/drivers/llvmpipe/lp_context.h | 21 +- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 3 - src/gallium/drivers/llvmpipe/lp_flush.c | 32 +- src/gallium/drivers/llvmpipe/lp_setup.h | 9 +- src/gallium/drivers/llvmpipe/lp_state.h | 6 - src/gallium/drivers/llvmpipe/lp_state_derived.c | 7 +- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 2 - src/gallium/drivers/llvmpipe/lp_state_surface.c | 41 +- src/gallium/drivers/llvmpipe/lp_tex_cache.c | 304 ---- src/gallium/drivers/llvmpipe/lp_tex_cache.h | 151 -- src/gallium/drivers/llvmpipe/lp_tex_sample.h | 3 - src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 1713 ----------------------- src/gallium/drivers/llvmpipe/lp_texture.c | 4 +- 14 files changed, 33 insertions(+), 2333 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_cache.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_cache.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 57e71f3e986..f087b653219 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -41,62 +41,12 @@ #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" -#include "lp_tile_cache.h" -#include "lp_tex_cache.h" #include "lp_texture.h" #include "lp_winsys.h" #include "lp_query.h" -/** - * Map any drawing surfaces which aren't already mapped - */ -void -llvmpipe_map_transfers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - struct pipe_surface *zsbuf = lp->framebuffer.zsbuf; - unsigned i; - - for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { - lp_tile_cache_map_transfers(lp->cbuf_cache[i]); - } - - if(zsbuf) { - if(!lp->zsbuf_transfer) - lp->zsbuf_transfer = screen->get_tex_transfer(screen, zsbuf->texture, - zsbuf->face, zsbuf->level, zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, zsbuf->width, zsbuf->height); - if(lp->zsbuf_transfer && !lp->zsbuf_map) - lp->zsbuf_map = screen->transfer_map(screen, lp->zsbuf_transfer); - - } -} - - -/** - * Unmap any mapped drawing surfaces - */ -void -llvmpipe_unmap_transfers(struct llvmpipe_context *lp) -{ - uint i; - - for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { - lp_tile_cache_unmap_transfers(lp->cbuf_cache[i]); - } - - if(lp->zsbuf_transfer) { - struct pipe_screen *screen = lp->pipe.screen; - - if(lp->zsbuf_map) { - screen->transfer_unmap(screen, lp->zsbuf_transfer); - lp->zsbuf_map = NULL; - } - } -} static void llvmpipe_destroy( struct pipe_context *pipe ) @@ -107,14 +57,16 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); + if (llvmpipe->setup) + lp_setup_destroy( llvmpipe->setup ); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]); pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); } + pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]); pipe_texture_reference(&llvmpipe->texture[i], NULL); } @@ -135,7 +87,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); unsigned i; - if(llvmpipe->dirty_render_cache) { + if (lp_setup_is_active(llvmpipe->setup)) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { if(llvmpipe->framebuffer.cbufs[i] && llvmpipe->framebuffer.cbufs[i]->texture == texture) @@ -226,21 +178,10 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe_init_query_funcs( llvmpipe ); llvmpipe_init_texture_funcs( llvmpipe ); - /* - * Alloc caches for accessing drawing surfaces and textures. - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - llvmpipe->cbuf_cache[i] = lp_create_tile_cache( screen ); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen ); - - /* vertex shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->tex_cache[i]; llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; } @@ -248,7 +189,6 @@ llvmpipe_create( struct pipe_screen *screen ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i]; llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; } diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 0b77ae58d50..17e88975463 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -42,12 +42,10 @@ struct llvmpipe_vbuf_render; struct draw_context; struct draw_stage; -struct llvmpipe_tile_cache; -struct llvmpipe_tex_tile_cache; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; - +struct lp_setup_context; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ @@ -112,8 +110,6 @@ struct llvmpipe_context { /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; - unsigned line_stipple_counter; - /** TGSI exec things */ struct { struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; @@ -122,6 +118,9 @@ struct llvmpipe_context { struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; + /** The tiling engine */ + struct lp_setup_context *setup; + /** The primitive drawing context */ struct draw_context *draw; @@ -129,18 +128,8 @@ struct llvmpipe_context { struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - boolean dirty_render_cache; - - struct llvmpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; - - /* TODO: we shouldn't be using external interfaces internally like this */ - struct pipe_transfer *zsbuf_transfer; - uint8_t *zsbuf_map; - unsigned tex_timestamp; - struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; - - unsigned no_rast : 1; + boolean no_rast; struct lp_jit_context jit_context; }; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 89772e62d31..2bffcdb3ba5 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -123,7 +123,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, if (lp->dirty) llvmpipe_update_derived( lp ); - llvmpipe_map_transfers(lp); llvmpipe_map_constant_buffers(lp); /* @@ -164,8 +163,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ llvmpipe_unmap_constant_buffers(lp); - - lp->dirty_render_cache = TRUE; return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index cd8381fe308..d0dd41f09c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -36,8 +36,6 @@ #include "lp_context.h" #include "lp_surface.h" #include "lp_state.h" -#include "lp_tile_cache.h" -#include "lp_tex_cache.h" #include "lp_winsys.h" @@ -47,40 +45,14 @@ llvmpipe_flush( struct pipe_context *pipe, struct pipe_fence_handle **fence ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - uint i; draw_flush(llvmpipe->draw); if (flags & PIPE_FLUSH_SWAPBUFFERS) { - /* If this is a swapbuffers, just flush color buffers. - * - * The zbuffer changes are not discarded, but held in the cache - * in the hope that a later clear will wipe them out. - */ - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) { - lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); - lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); - } - - /* Need this call for hardware buffers before swapbuffers. - * - * there should probably be another/different flush-type function - * that's called before swapbuffers because we don't always want - * to unmap surfaces when flushing. - */ - llvmpipe_unmap_transfers(llvmpipe); + lp_setup_flush( llvmpipe->setup, FALSE ); } else if (flags & PIPE_FLUSH_RENDER_CACHE) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) { - lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); - lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); - } - - /* FIXME: untile zsbuf! */ - - llvmpipe->dirty_render_cache = FALSE; + lp_setup_flush( llvmpipe->setup, TRUE ); } /* Enable to dump BMPs of the color/depth buffers each frame */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 2542faad36b..39e7b558c81 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -41,11 +41,18 @@ struct setup_context; struct setup_context * lp_setup_create( void ); +void +lp_setup_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil); + void lp_setup_triangle(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], - const float (*v1)[4]); + const float (*v2)[4]); void lp_setup_line(struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7b26ce61a38..a9980d6f14a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -207,12 +207,6 @@ void llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); -void -llvmpipe_map_transfers(struct llvmpipe_context *lp); - -void -llvmpipe_unmap_transfers(struct llvmpipe_context *lp); - void llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index fcd31136b75..c9439c7154c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -33,7 +33,6 @@ #include "draw/draw_private.h" #include "lp_context.h" #include "lp_screen.h" -#include "lp_tex_cache.h" #include "lp_state.h" @@ -215,7 +214,7 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i]; llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; + llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; } /* fragment shader samplers */ @@ -225,10 +224,6 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; } - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] ); - } - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index c69d90c723a..ae787801eb6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -37,7 +37,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_tex_cache.h" #include "draw/draw_context.h" @@ -97,7 +96,6 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&llvmpipe->texture[i], tex); - lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index c06ce8b75c1..06560335d70 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -31,16 +31,12 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_surface.h" -#include "lp_tile_cache.h" #include "draw/draw_context.h" /** - * XXX this might get moved someday * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. - * Here, we flush the old surfaces and update the tile cache to point to the new - * surfaces. */ void llvmpipe_set_framebuffer_state(struct pipe_context *pipe, @@ -48,38 +44,23 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, { struct llvmpipe_context *lp = llvmpipe_context(pipe); uint i; + boolean dirty = FALSE; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { - /* flush old */ - lp_tile_cache_map_transfers(lp->cbuf_cache[i]); - lp_flush_tile_cache(lp->cbuf_cache[i]); - - /* assign new */ pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); - - /* update cache */ - lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]); + dirty = TRUE; } } - lp->framebuffer.nr_cbufs = fb->nr_cbufs; + if (lp->framebuffer.nr_cbufs != fb->nr_cbufs) { + dirty = TRUE; + lp->framebuffer.nr_cbufs = fb->nr_cbufs; + } /* zbuf changing? */ if (lp->framebuffer.zsbuf != fb->zsbuf) { - - if(lp->zsbuf_transfer) { - struct pipe_screen *screen = pipe->screen; - - if(lp->zsbuf_map) { - screen->transfer_unmap(screen, lp->zsbuf_transfer); - lp->zsbuf_map = NULL; - } - - screen->tex_transfer_destroy(lp->zsbuf_transfer); - lp->zsbuf_transfer = NULL; - } + dirty = TRUE; /* assign new */ pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); @@ -100,8 +81,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } } - lp->framebuffer.width = fb->width; - lp->framebuffer.height = fb->height; - - lp->dirty |= LP_NEW_FRAMEBUFFER; + if (dirty) { + lp_setup_set_framebuffer( llvmpipe->setup, fb ); + lp->dirty |= LP_NEW_FRAMEBUFFER; + } } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c deleted file mode 100644 index 773e8482425..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ /dev/null @@ -1,304 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture tile caching. - * - * Author: - * Brian Paul - */ - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_tile.h" -#include "util/u_format.h" -#include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_cache.h" - - - -/** - * Return the position in the cache for the tile that contains win pos (x,y). - * We currently use a direct mapped cache so this is like a hack key. - * At some point we should investige something more sophisticated, like - * a LRU replacement policy. - */ -#define CACHE_POS(x, y) \ - (((x) + (y) * 5) % NUM_ENTRIES) - - - -/** - * Is the tile at (x,y) in cleared state? - */ -static INLINE uint -is_clear_flag_set(const uint *bitvec, union tex_tile_address addr) -{ - int pos, bit; - pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x; - assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32); - bit = bitvec[pos / 32] & (1 << (pos & 31)); - return bit; -} - - -/** - * Mark the tile at (x,y) as not cleared. - */ -static INLINE void -clear_clear_flag(uint *bitvec, union tex_tile_address addr) -{ - int pos; - pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x; - assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32); - bitvec[pos / 32] &= ~(1 << (pos & 31)); -} - - -struct llvmpipe_tex_tile_cache * -lp_create_tex_tile_cache( struct pipe_screen *screen ) -{ - struct llvmpipe_tex_tile_cache *tc; - uint pos; - - tc = CALLOC_STRUCT( llvmpipe_tex_tile_cache ); - if (tc) { - tc->screen = screen; - for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].addr.bits.invalid = 1; - } - tc->last_tile = &tc->entries[0]; /* any tile */ - } - return tc; -} - - -void -lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc) -{ - struct pipe_screen *screen; - uint pos; - - for (pos = 0; pos < NUM_ENTRIES; pos++) { - /*assert(tc->entries[pos].x < 0);*/ - } - if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); - } - if (tc->tex_trans) { - screen = tc->tex_trans->texture->screen; - screen->tex_transfer_destroy(tc->tex_trans); - } - - FREE( tc ); -} - - -void -lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); - - if (tc->tex_trans && !tc->tex_trans_map) - tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); -} - - -void -lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); - tc->transfer_map = NULL; - } - - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } -} - -void -lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->texture) { - struct llvmpipe_texture *lpt = llvmpipe_texture(tc->texture); - if (lpt->timestamp != tc->timestamp) { - /* texture was modified, invalidate all cached tiles */ - uint i; - debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->timestamp = lpt->timestamp; - } - } -} - -/** - * Specify the texture to cache. - */ -void -lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc, - struct pipe_texture *texture) -{ - uint i; - - assert(!tc->transfer); - - if (tc->texture != texture) { - pipe_texture_reference(&tc->texture, texture); - - if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; - - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - /* mark as entries as invalid/empty */ - /* XXX we should try to avoid this when the teximage hasn't changed */ - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->tex_face = -1; /* any invalid value here */ - } -} - - -/** - * Given the texture face, level, zslice, x and y values, compute - * the cache entry position/index where we'd hope to find the - * cached texture tile. - * This is basically a direct-map cache. - * XXX There's probably lots of ways in which we can improve this. - */ -static INLINE uint -tex_cache_pos( union tex_tile_address addr ) -{ - uint entry = (addr.bits.x + - addr.bits.y * 9 + - addr.bits.z * 3 + - addr.bits.face + - addr.bits.level * 7); - - return entry % NUM_ENTRIES; -} - -/** - * Similar to lp_get_cached_tile() but for textures. - * Tiles are read-only and indexed with more params. - */ -const struct llvmpipe_cached_tex_tile * -lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ) -{ - struct pipe_screen *screen = tc->screen; - struct llvmpipe_cached_tex_tile *tile; - - tile = tc->entries + tex_cache_pos( addr ); - - if (addr.value != tile->addr.value) { - - /* cache miss. Most misses are because we've invaldiated the - * texture cache previously -- most commonly on binding a new - * texture. Currently we effectively flush the cache on texture - * bind. - */ -#if 0 - _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" - " tile %u: x=%d y=%d z=%d face=%d level=%d\n", - pos, x/TEX_TILE_SIZE, y/TEX_TILE_SIZE, z, face, level, - pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level); -#endif - - /* check if we need to get a new transfer */ - if (!tc->tex_trans || - tc->tex_face != addr.bits.face || - tc->tex_level != addr.bits.level || - tc->tex_z != addr.bits.z) { - /* get new transfer (view into texture) */ - - if (tc->tex_trans) { - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - tc->tex_trans = - screen->get_tex_transfer(screen, tc->texture, - addr.bits.face, - addr.bits.level, - addr.bits.z, - PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); - - tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); - - tc->tex_face = addr.bits.face; - tc->tex_level = addr.bits.level; - tc->tex_z = addr.bits.z; - } - - { - unsigned x = addr.bits.x * TEX_TILE_SIZE; - unsigned y = addr.bits.y * TEX_TILE_SIZE; - unsigned w = TEX_TILE_SIZE; - unsigned h = TEX_TILE_SIZE; - - if (pipe_clip_tile(x, y, &w, &h, tc->tex_trans)) { - assert(0); - } - - util_format_read_4ub(tc->tex_trans->format, - (uint8_t *)tile->color, sizeof tile->color[0], - tc->tex_trans_map, tc->tex_trans->stride, - x, y, w, h); - } - - tile->addr = addr; - } - - tc->last_tile = tile; - return tile; -} diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h deleted file mode 100644 index 9fa6c368125..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_TEX_CACHE_H -#define LP_TEX_CACHE_H - - -#include "pipe/p_compiler.h" - - -struct llvmpipe_context; -struct llvmpipe_tex_tile_cache; - - -/** - * Cache tile size (width and height). This needs to be a power of two. - */ -#define TEX_TILE_SIZE 64 - - -/* If we need to support > 4096, just expand this to be a 64 bit - * union, or consider tiling in Z as well. - */ -union tex_tile_address { - struct { - unsigned x:6; /* 4096 / TEX_TILE_SIZE */ - unsigned y:6; /* 4096 / TEX_TILE_SIZE */ - unsigned z:12; /* 4096 -- z not tiled */ - unsigned face:3; - unsigned level:4; - unsigned invalid:1; - } bits; - unsigned value; -}; - - -struct llvmpipe_cached_tex_tile -{ - union tex_tile_address addr; - uint8_t color[TEX_TILE_SIZE][TEX_TILE_SIZE][4]; -}; - -#define NUM_ENTRIES 50 - - -/** XXX move these */ -#define MAX_TEX_WIDTH 2048 -#define MAX_TEX_HEIGHT 2048 - - -struct llvmpipe_tex_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct pipe_texture *texture; /**< if caching a texture */ - unsigned timestamp; - - struct llvmpipe_cached_tex_tile entries[NUM_ENTRIES]; - - struct pipe_transfer *tex_trans; - void *tex_trans_map; - int tex_face, tex_level, tex_z; - - struct llvmpipe_cached_tex_tile *last_tile; /**< most recently retrieved tile */ -}; - - -extern struct llvmpipe_tex_tile_cache * -lp_create_tex_tile_cache( struct pipe_screen *screen ); - -extern void -lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc, - struct pipe_texture *texture); - -void -lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc); - -extern const struct llvmpipe_cached_tex_tile * -lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ); - -static INLINE const union tex_tile_address -tex_tile_address( unsigned x, - unsigned y, - unsigned z, - unsigned face, - unsigned level ) -{ - union tex_tile_address addr; - - addr.value = 0; - addr.bits.x = x / TEX_TILE_SIZE; - addr.bits.y = y / TEX_TILE_SIZE; - addr.bits.z = z; - addr.bits.face = face; - addr.bits.level = level; - - return addr; -} - -/* Quickly retrieve tile if it matches last lookup. - */ -static INLINE const struct llvmpipe_cached_tex_tile * -lp_get_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ) -{ - if (tc->last_tile->addr.value == addr.value) - return tc->last_tile; - - return lp_find_cached_tex_tile( tc, addr ); -} - - -#endif /* LP_TEX_CACHE_H */ - diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 9ad1bde9565..526ea100db6 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -34,7 +34,6 @@ #include "tgsi/tgsi_exec.h" -struct llvmpipe_tex_tile_cache; struct lp_sampler_static_state; @@ -55,8 +54,6 @@ struct lp_shader_sampler const struct pipe_texture *texture; const struct pipe_sampler_state *sampler; - - struct llvmpipe_tex_tile_cache *cache; }; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c deleted file mode 100644 index a1365a045f1..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ /dev/null @@ -1,1713 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture sampling - * - * Authors: - * Brian Paul - */ - -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_sample.h" -#include "lp_tex_cache.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -/* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. - * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). - */ -#define FRAC(f) ((f) - util_ifloor(f)) - - -/** - * Linear interpolation macro - */ -static INLINE float -lerp(float a, float v0, float v1) -{ - return v0 + a * (v1 - v0); -} - - -/** - * Do 2D/biliner interpolation of float values. - * v00, v10, v01 and v11 are typically four texture samples in a square/box. - * a and b are the horizontal and vertical interpolants. - * It's important that this function is inlined when compiled with - * optimization! If we find that's not true on some systems, convert - * to a macro. - */ -static INLINE float -lerp_2d(float a, float b, - float v00, float v10, float v01, float v11) -{ - const float temp0 = lerp(a, v00, v10); - const float temp1 = lerp(a, v01, v11); - return lerp(b, temp0, temp1); -} - - -/** - * As above, but 3D interpolation of 8 values. - */ -static INLINE float -lerp_3d(float a, float b, float c, - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111) -{ - const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); - const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); - return lerp(c, temp0, temp1); -} - - - -/** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. - */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) - - -/** - * Apply texture coord wrapping mode and return integer texture indexes - * for a vector of four texcoords (S or T or P). - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the incoming texcoords - * \param size the texture image size - * \param icoord returns the integer texcoords - * \return integer texture index - */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); - } -} - - -/** - * Used to compute texel locations for linear sampling for four texcoords. - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoords - * \param size the texture image size - * \param icoord0 returns first texture indexes - * \param icoord1 returns second texture indexes (usually icoord0 + 1) - * \param w returns blend factor/weight between texture indexes - * \param icoord returns the computed integer texture coords - */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. - */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. - */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); - } -} - - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - - return face; -} - - -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; - - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; - - assert(sampler->normalized_coords); - - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; - rho = MAX2(rho, max); - } - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; -} - - -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } -} - - -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into lp_tile_cache.c and merge with the - * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ -static void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, - const uint8_t *out[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - out[0] = &tile->color[y ][x ][0]; - out[1] = &tile->color[y ][x+1][0]; - out[2] = &tile->color[y+1][x ][0]; - out[3] = &tile->color[y+1][x+1][0]; -} - -static INLINE const uint8_t * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - return &tile->color[y][x][0]; -} - - -static void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, - int x0, int y0, - int x1, int y1, - const uint8_t *out[4]) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } -} - -static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; - } - else { - const unsigned tx = x % TEX_TILE_SIZE; - const unsigned ty = y % TEX_TILE_SIZE; - const struct llvmpipe_cached_tex_tile *tile; - - tile = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); - rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); - rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); - rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } - } -} - - -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} - - -/** - * As above, but do four z/texture comparisons. - */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ - unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *tx[4]; - - - /* Can we fetch all four at once: - */ - if (x0 < xmax && y0 < ymax) - { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); - } - - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw, yw, - ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), - ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); - } - } -} - - -static void -lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int x0, y0; - const uint8_t *out; - - x0 = util_ifloor(u); - if (x0 < 0) - x0 = 0; - else if (x0 > xpot - 1) - x0 = xpot - 1; - - y0 = util_ifloor(v); - if (y0 < 0) - y0 = 0; - else if (y0 > ypot - 1) - y0 = ypot - 1; - - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; - - if (lambda < 0.0) { - samp->level = 0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - - samp->level = level0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); - - samp->level = level0+1; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); - - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } - } -} - -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ -static void -lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static INLINE void -lp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static void -lp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); - } - lp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); -} - - -static void -lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - } - } - break; - default: - assert(0); - } -} - - -/** - * Error condition handler - */ -static INLINE void -lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - int i,j; - - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; -} - -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. - */ -void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = lp_get_samples_null; - - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = lp_get_samples_rect; - goto out; - } - - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = lp_get_samples_1d; - break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = lp_get_samples_2d; - break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = lp_get_samples_3d; - break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = lp_get_samples_cube; - break; - default: - assert(0); - break; - } - - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); - - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. - */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; - break; - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; - break; - default: - break; - } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; - break; - default: - break; - } - } - } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } - } - } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); - } - -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); -} - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_tgsi.h" - - -struct lp_c_sampler_soa -{ - struct lp_build_sampler_soa base; - - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -static void -lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) -{ - FREE(sampler); -} - - -static void -lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, - LLVMBuilderRef builder, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!sampler->samplers_ptr) - sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); - - if(!sampler->store_ptr) - sampler->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = sampler->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = sampler->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr) -{ - struct lp_c_sampler_soa *sampler; - - sampler = CALLOC_STRUCT(lp_c_sampler_soa); - if(!sampler) - return NULL; - - sampler->base.destroy = lp_c_sampler_soa_destroy; - sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; - sampler->context_ptr = context_ptr; - - return &sampler->base; -} - diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 08f0950d475..1682e37354a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -40,7 +40,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_tex_cache.h" #include "lp_screen.h" #include "lp_winsys.h" @@ -241,7 +240,7 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE)) { - /* Mark the surface as dirty. The tile cache will look for this. */ + /* Mark the surface as dirty. */ lpt->timestamp++; llvmpipe_screen(screen)->timestamp++; } @@ -368,7 +367,6 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. - * In llvmpipe, that would mean flushing the texture cache. */ screen->timestamp++; } -- cgit v1.2.3 From ee3383bc5ab81ff12d9faa675c1c38683300ce68 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 17:06:03 +0100 Subject: llvmpipe: Update more copyright headers. --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 27 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 +++-- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 7eced38d672..b819519553f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef LP_RAST_PRIV_H #define LP_RAST_PRIV_H diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 91540d6751e..eeee7159d93 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,12 +18,13 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ + #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H -- cgit v1.2.3 From ce7ac8e7439fba74fc1ee368559dd520a2d1eabe Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 17:06:32 +0100 Subject: llvmpipe: Update lp_setup_clear invocation. --- src/gallium/drivers/llvmpipe/lp_clear.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup.h | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 9efb3d40839..4bae44e2ea2 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -55,5 +55,5 @@ llvmpipe_clear(struct pipe_context *pipe, if (llvmpipe->no_rast) return; - lp_setup_clear( llvmpipe->setup, buffers, rgba, depth, stencil ); + lp_setup_clear( llvmpipe->setup, rgba, depth, stencil, buffers ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 39e7b558c81..6d741f72712 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -27,6 +27,7 @@ #ifndef LP_SETUP_H #define LP_SETUP_H +#include "pipe/p_compiler.h" enum lp_interp { LP_INTERP_CONSTANT, @@ -42,11 +43,11 @@ struct setup_context * lp_setup_create( void ); void -lp_setup_clear(struct pipe_context *pipe, - unsigned buffers, - const float *rgba, - double depth, - unsigned stencil); +lp_setup_clear(struct setup_context *setup, + const float *clear_color, + double clear_depth, + unsigned clear_stencil, + unsigned flags); void lp_setup_triangle(struct setup_context *setup, -- cgit v1.2.3 From 5974b80380de1a2fcaf71c638a8a11973379529d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 17:06:49 +0100 Subject: llvmpipe: Remove dead files from SConscript. --- src/gallium/drivers/llvmpipe/SConscript | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5e0fadc247c..b39bc76da0f 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -61,8 +61,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_tex_cache.c', - 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', -- cgit v1.2.3 From 921584181eb2f3b2849d150295dfce1dae25dd11 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 17:26:13 +0100 Subject: llvmpipe: Fix up lp_rast_shade_quads. --- src/gallium/drivers/llvmpipe/lp_jit.h | 4 ++-- src/gallium/drivers/llvmpipe/lp_rast.c | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 58f716ede29..643e85be200 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -108,13 +108,13 @@ struct lp_jit_context typedef void -(*lp_jit_frag_func)(struct lp_jit_context *context, +(*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, const void *a0, const void *dadx, const void *dady, - uint32_t *mask, + const uint32_t *mask, void *color, void *depth); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index df48ccce81d..e3d1cd56e0a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -30,6 +30,9 @@ #include "lp_state.h" #include "lp_quad.h" #include "lp_rast.h" +#include "lp_rast_priv.h" +#include "lp_tile_soa.h" +#include "lp_bld_debug.h" struct lp_rasterizer *lp_rast_create( void ) @@ -137,7 +140,6 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, struct quad_header **quads, unsigned nr ) { - struct lp_fragment_shader *fs = llvmpipe->fs; struct quad_header *quad = quads[0]; const unsigned x = quad->input.x0; const unsigned y = quad->input.y0; @@ -167,7 +169,7 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = (uint8_t)*tile->depth + y*TILE_SIZE*4 + 2*x*4; + depth = (uint8_t *)tile->depth + y*TILE_SIZE*4 + 2*x*4; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); @@ -177,14 +179,14 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, assert(lp_check_alignment(state->jc.blend_color, 16)); /* run shader */ - state->jit_function( &state->jc, - x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, - &mask[0][0], - color, - depth); + state->shader( &state->jc, + x, y, + quad->coef->a0, + quad->coef->dadx, + quad->coef->dady, + &mask[0][0], + color, + depth); } -- cgit v1.2.3 From d0c918b87a9fb0e86d6b3efedf3ef505e04c527f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 17:20:40 +0100 Subject: llvmpipe: remove some old sampler support structs --- src/gallium/drivers/llvmpipe/lp_context.c | 40 +++++-------------------- src/gallium/drivers/llvmpipe/lp_context.h | 12 ++------ src/gallium/drivers/llvmpipe/lp_jit.c | 2 +- src/gallium/drivers/llvmpipe/lp_jit.h | 2 +- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 11 +++---- src/gallium/drivers/llvmpipe/lp_rast.c | 44 +++++++++++++--------------- src/gallium/drivers/llvmpipe/lp_setup.h | 13 +++++--- src/gallium/drivers/llvmpipe/lp_tex_sample.h | 28 ------------------ 8 files changed, 45 insertions(+), 107 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index f087b653219..7f7b04412c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -44,6 +44,7 @@ #include "lp_texture.h" #include "lp_winsys.h" #include "lp_query.h" +#include "lp_setup.h" @@ -85,20 +86,8 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, unsigned face, unsigned level) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); - unsigned i; - if (lp_setup_is_active(llvmpipe->setup)) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - if(llvmpipe->framebuffer.cbufs[i] && - llvmpipe->framebuffer.cbufs[i]->texture == texture) - return PIPE_REFERENCED_FOR_WRITE; - } - if(llvmpipe->framebuffer.zsbuf && - llvmpipe->framebuffer.zsbuf->texture == texture) - return PIPE_REFERENCED_FOR_WRITE; - } - - return PIPE_UNREFERENCED; + return lp_setup_is_texture_referenced(llvmpipe->setup, texture); } static unsigned int @@ -112,7 +101,6 @@ struct pipe_context * llvmpipe_create( struct pipe_screen *screen ) { struct llvmpipe_context *llvmpipe; - uint i; llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16); if (!llvmpipe) @@ -178,20 +166,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe_init_query_funcs( llvmpipe ); llvmpipe_init_texture_funcs( llvmpipe ); - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; - } - /* * Create drawing context and plug our rendering stage into it. */ @@ -199,14 +173,16 @@ llvmpipe_create( struct pipe_screen *screen ) if (!llvmpipe->draw) goto fail; - draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_SAMPLERS, - (struct tgsi_sampler **) - llvmpipe->tgsi.vert_samplers_list); + /* FIXME: vertex sampler state + */ if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; + llvmpipe->setup = lp_setup_create(); + if (!llvmpipe->setup) + goto fail; + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); if (!llvmpipe->vbuf_backend) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 17e88975463..852f7a1d05c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -45,7 +45,7 @@ struct draw_stage; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; -struct lp_setup_context; +struct setup_context; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ @@ -110,16 +110,8 @@ struct llvmpipe_context { /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; - /** TGSI exec things */ - struct { - struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; - } tgsi; - /** The tiling engine */ - struct lp_setup_context *setup; + struct setup_context *setup; /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 1126bf90b96..a03eb874acb 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -91,7 +91,7 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, screen->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers, + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, dummy, screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, screen->target, context_type, 2); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 643e85be200..207dfbfde10 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -78,7 +78,7 @@ struct lp_jit_context { const float *constants; - struct tgsi_sampler **samplers; + void *dummy; /* remove me */ float alpha_ref_value; diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 6c51d40a8f4..925e6f8b3bd 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -136,9 +136,8 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct setup_context *setup_ctx = cvbr->setup; - llvmpipe_update_state( setup_ctx->llvmpipe ); + llvmpipe_update_derived( cvbr->llvmpipe ); cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; @@ -524,9 +523,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_vbuf_destroy(struct vbuf_render *vbr) { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - lp_setup_destroy_context(cvbr->setup); - FREE(cvbr); + FREE(vbr); } @@ -539,6 +536,7 @@ lp_create_vbuf_backend(struct llvmpipe_context *lp) struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); assert(lp->draw); + assert(lp->setup); cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; @@ -555,8 +553,7 @@ lp_create_vbuf_backend(struct llvmpipe_context *lp) cvbr->base.destroy = lp_vbuf_destroy; cvbr->llvmpipe = lp; - - cvbr->setup = lp_setup_create_context(cvbr->llvmpipe); + cvbr->setup = lp->setup; return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index e3d1cd56e0a..498879e4cf9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,7 +40,7 @@ struct lp_rasterizer *lp_rast_create( void ) return CALLOC_STRUCT(lp_rasterizer); } -void lp_rast_bind_surfaces( struct lp_rasterizer *, +void lp_rast_bind_surfaces( struct lp_rasterizer *rast, struct pipe_surface *color, struct pipe_surface *zstencil, const float *clear_color, @@ -49,11 +49,9 @@ void lp_rast_bind_surfaces( struct lp_rasterizer *, { pipe_surface_reference(&rast->state.color, color); pipe_surface_reference(&rast->state.depth, depth); - rast->state.clear_color = util_pack_8888(clear_color); - rast->state.clear_depth = clear_depth * 0xffffffff; - rast->state.clear_stencil = clear_stencil; } + /* Begining of each tile: */ void lp_rast_start_tile( struct lp_rasterizer *, @@ -64,9 +62,10 @@ void lp_rast_start_tile( struct lp_rasterizer *, rast->y = y; } -void lp_rast_clear_color( struct lp_rasterizer *rast ) +void lp_rast_clear_color( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg ) { - const unsigned clear_color = rast->state.clear_color; + const unsigned clear_color = arg->clear.clear_color; unsigned i, j; for (i = 0; i < TILESIZE; i++) @@ -74,9 +73,10 @@ void lp_rast_clear_color( struct lp_rasterizer *rast ) rast->tile[i][j] = clear_color; } -void lp_rast_clear_depth( struct lp_rasterizer *rast ) +void lp_rast_clear_zstencil( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg) { - const unsigned clear_depth = rast->state.clear_depth; + const unsigned clear_color = arg->clear.clear_zstencil; unsigned i, j; for (i = 0; i < TILESIZE; i++) @@ -84,19 +84,15 @@ void lp_rast_clear_depth( struct lp_rasterizer *rast ) rast->tile[i][j] = clear_depth; } -void lp_rast_clear_stencil( struct lp_rasterizer *rast ) -{ - const unsigned clear_stencil = rast->state.clear_stencil; - - memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); -} -void lp_rast_load_color( struct lp_rasterizer *rast ) +void lp_rast_load_color( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg) { /* call u_tile func to load colors from surface */ } -void lp_rast_load_zstencil( struct lp_rasterizer *rast ) +void lp_rast_load_zstencil( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg ) { /* call u_tile func to load depth (and stencil?) from surface */ } @@ -104,15 +100,15 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast ) /* Within a tile: */ void lp_rast_set_state( struct lp_rasterizer *rast, - const struct lp_rast_state *state ) + const union lp_rast_cmd_arg *arg ) { - rast->shader_state = state; - lp->quad.first->begin( lp->quad.first ); + rast->shader_state = arg->state; } void lp_rast_shade_tile( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg *arg ) const struct lp_rast_shader_inputs *inputs ) { /* Set up the silly quad coef pointers @@ -193,14 +189,14 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* End of tile: */ -void lp_rast_store_color( struct lp_rasterizer *rast ) +void lp_rast_end_tile( struct lp_rasterizer *rast, + boolean write_depth ) { /* call u_tile func to store colors to surface */ -} -void lp_rast_store_zstencil( struct lp_rasterizer *rast ) -{ - /* call u_tile func to store depth/stencil to surface */ + if (write_depth) { + /* call u_tile func to store depth/stencil to surface */ + } } /* Shutdown: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6d741f72712..5151a174f26 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -50,10 +50,10 @@ lp_setup_clear(struct setup_context *setup, unsigned flags); void -lp_setup_triangle(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]); +lp_setup_tri(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]); void lp_setup_line(struct setup_context *setup, @@ -74,6 +74,11 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const enum lp_interp *interp, unsigned nr ); +boolean +lp_setup_is_texture_referenced( struct setup_context *setup, + const struct pipe_texture *texture ); + + void lp_setup_destroy( struct setup_context *setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 526ea100db6..dfc9c0e6f04 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -37,34 +37,6 @@ struct lp_sampler_static_state; -/** - * Subclass of tgsi_sampler - */ -struct lp_shader_sampler -{ - struct tgsi_sampler base; /**< base class */ - - unsigned processor; - - /* For lp_get_samples_2d_linear_POT: - */ - unsigned xpot; - unsigned ypot; - unsigned level; - - const struct pipe_texture *texture; - const struct pipe_sampler_state *sampler; -}; - - - -static INLINE struct lp_shader_sampler * -lp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (struct lp_shader_sampler *) sampler; -} - - extern void lp_get_samples(struct tgsi_sampler *tgsi_sampler, -- cgit v1.2.3 From 1caa26202c3bcc41ea5829b646128088e14d5dfd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 17:52:35 +0100 Subject: llvmpipe: start cleaning up --- src/gallium/drivers/llvmpipe/SConscript | 4 +-- src/gallium/drivers/llvmpipe/lp_rast.h | 12 ++++++-- src/gallium/drivers/llvmpipe/lp_setup.c | 41 ++++++++++++++----------- src/gallium/drivers/llvmpipe/lp_setup.h | 1 + src/gallium/drivers/llvmpipe/lp_setup_context.h | 26 ++++++++++------ 5 files changed, 52 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index b39bc76da0f..f6945535cad 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -47,8 +47,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_jit.c', 'lp_prim_vbuf.c', 'lp_query.c', - 'lp_rast.c', - 'lp_rast_tri.c', 'lp_setup.c', 'lp_screen.c', 'lp_state_blend.c', @@ -61,6 +59,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 33a6065b89c..f40208bbda9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -28,6 +28,8 @@ #ifndef LP_RAST_H #define LP_RAST_H +#include "lp_jit.h" + /* Initially create and program a single rasterizer directly. Later * will want multiple of these, one or two per core. At that stage * will probably pass command buffers into the rasterizers rather than @@ -35,6 +37,9 @@ */ struct lp_rasterizer; +#define TILESIZE 64 + + struct lp_rast_state { /* State for the shader: */ @@ -55,10 +60,11 @@ struct lp_rast_shader_inputs { */ const struct lp_rast_state *state; - /* Attribute interpolation: + /* Attribute interpolation: FIXME: reduce memory waste! */ - struct tgsi_interp_coef position_coef; - struct tgsi_interp_coef *coef; + float a0[PIPE_MAX_ATTRIBS][4]; + float dadx[PIPE_MAX_ATTRIBS][4]; + float dady[PIPE_MAX_ATTRIBS][4]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 514366b71f0..43a4f5f0297 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -32,7 +32,7 @@ * lp_setup_flush(). */ -#include "lp_setup.h" +#include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -56,31 +56,33 @@ void lp_setup_new_data_block( struct data_block_list *list ) static void reset_context( struct setup_context *setup ) { + unsigned i, j; + for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - struct cmd_block_list *list = scene->tile[i][j]; + struct cmd_block_list *list = &setup->tile[i][j]; struct cmd_block *block; struct cmd_block *tmp; - for (block = list->first; block != list->tail; block = tmp) { + for (block = list->head; block != list->tail; block = tmp) { tmp = block->next; FREE(block); } - list->first = list->tail; + list->head = list->tail; } } { - struct data_block_list *list = &scene->data; + struct data_block_list *list = &setup->data; struct data_block *block, *tmp; - for (block = list->first; block != list->tail; block = tmp) { + for (block = list->head; block != list->tail; block = tmp) { tmp = block->next; FREE(block); } - list->first = list->tail; + list->head = list->tail; } } @@ -90,39 +92,42 @@ static void reset_context( struct setup_context *setup ) /* Add a command to all active bins. */ static void bin_everywhere( struct setup_context *setup, - bin_cmd cmd, + lp_rast_cmd cmd, const union lp_rast_cmd_arg *arg ) { unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - bin_cmd( setup, &setup->tile[i][j], cmd, arg ); + bin_cmd( &setup->tile[i][j], cmd, arg ); } static void rasterize_bins( struct setup_context *setup, - struct lp_rast *rast, boolean write_depth ) { + struct lp_rasterizer *rast = setup->rast; + struct cmd_block *block; + unsigned i,j,k; + lp_rast_bind_color( rast, - scene->fb.color, + setup->fb.color, TRUE ); /* WRITE */ lp_rast_bind_depth( rast, - scene->fb.depth, + setup->fb.zstencil, write_depth ); /* WRITE */ - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { + for (i = 0; i < setup->tiles_x; i++) { + for (j = 0; j < setup->tiles_y; j++) { lp_rast_start_tile( rast, i * TILESIZE, j * TILESIZE ); - for (block = scene->tile[i][j].first; block; block = block->next) { - for (k = 0; k < block->nr_cmds; k++) { - block->cmd[k].func( rast, block->cmd[k].arg ); + for (block = setup->tile[i][j].head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, block->arg[k] ); } } @@ -130,7 +135,7 @@ rasterize_bins( struct setup_context *setup, } } - lp_setup_free_data( setup ); + reset_context( setup ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 5151a174f26..6f560f5f931 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -37,6 +37,7 @@ enum lp_interp { LP_INTERP_FACING }; +struct pipe_texture; struct setup_context; struct setup_context * diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index eeee7159d93..19d163df8e5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -28,23 +28,25 @@ #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H +#include "lp_setup.h" +#include "lp_rast.h" #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rast *, const union lp_rast_cmd_arg * ); +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg * ); struct cmd_block { - union lp_rast_arg *arg[CMD_BLOCK_MAX]; lp_rast_cmd cmd[CMD_BLOCK_MAX]; + const union lp_rast_cmd_arg *arg[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; struct data_block { - ubyte data[DATA_BLOCK_SZ]; + ubyte data[DATA_BLOCK_SIZE]; unsigned used; struct data_block *next; }; @@ -68,10 +70,12 @@ struct data_block_list { struct setup_context { + struct lp_rasterizer *rast; + /* When there are multiple threads, will want to double-buffer the * bin arrays: */ - struct cmd_block_list bin[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; + struct cmd_block_list tile[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; struct data_block_list data; unsigned tiles_x; @@ -110,9 +114,12 @@ struct setup_context { void (*triangle)( struct setup_context *, const float (*v0)[4], const float (*v1)[4], - const float (*v1)[4]); + const float (*v2)[4]); }; +void lp_setup_new_data_block( struct data_block_list *list ); +void lp_setup_new_cmd_block( struct cmd_block_list *list ); + static INLINE void *get_data( struct data_block_list *list, unsigned size) { @@ -123,7 +130,7 @@ static INLINE void *get_data( struct data_block_list *list, { struct data_block *tail = list->tail; - char *data = tail->data + tail->used; + ubyte *data = tail->data + tail->used; tail->used += size; return data; } @@ -132,11 +139,11 @@ static INLINE void *get_data( struct data_block_list *list, /* Add a command to a given bin. */ static INLINE void bin_cmd( struct cmd_block_list *list, - bin_cmd cmd, + lp_rast_cmd cmd, const union lp_rast_cmd_arg *arg ) { - if (list->tail.count == CMD_BLOCK_MAX) { - lp_setup_new_cmd_block( list ) + if (list->tail->count == CMD_BLOCK_MAX) { + lp_setup_new_cmd_block( list ); } { @@ -150,3 +157,4 @@ static INLINE void bin_cmd( struct cmd_block_list *list, +#endif -- cgit v1.2.3 From 37b86aa55c6bb520997c00dbf1a2b38d4aed38eb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 17:59:44 +0100 Subject: llvmpipe: Implement some of the rasterizer functions. --- src/gallium/drivers/llvmpipe/lp_rast.c | 88 +++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 498879e4cf9..2217debc02c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -37,7 +37,16 @@ struct lp_rasterizer *lp_rast_create( void ) { - return CALLOC_STRUCT(lp_rasterizer); + struct lp_rasterizer *rast; + + rast = CALLOC_STRUCT(lp_rasterizer); + if(!rast) + return NULL; + + rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + + return rast; } void lp_rast_bind_surfaces( struct lp_rasterizer *rast, @@ -54,7 +63,7 @@ void lp_rast_bind_surfaces( struct lp_rasterizer *rast, /* Begining of each tile: */ -void lp_rast_start_tile( struct lp_rasterizer *, +void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { @@ -68,9 +77,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, const unsigned clear_color = arg->clear.clear_color; unsigned i, j; - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_color; + if (clear_color[0] == clear_color[1] && + clear_color[1] == clear_color[2] && + clear_color[2] == clear_color[3]) { + memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + } + else { + for (y = 0; y < TILE_SIZE; y++) + for (x = 0; x < TILE_SIZE; x++) + for (chan = 0; chan < 4; ++chan) + TILE_PIXEL(rast->tile.color, x, y, chan) = clear_color[chan]; + } } void lp_rast_clear_zstencil( struct lp_rasterizer *rast, @@ -79,9 +96,9 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const unsigned clear_color = arg->clear.clear_zstencil; unsigned i, j; - for (i = 0; i < TILESIZE; i++) - for (j = 0; j < TILESIZE; j++) - rast->tile[i][j] = clear_depth; + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + rast->tile.depth[i][j] = clear_depth; } @@ -108,9 +125,11 @@ void lp_rast_set_state( struct lp_rasterizer *rast, void lp_rast_shade_tile( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg *arg, const struct lp_rast_shader_inputs *inputs ) { + unsigned i; + /* Set up the silly quad coef pointers */ for (i = 0; i < 4; i++) { @@ -120,8 +139,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, /* Use the existing preference for 8x2 (four quads) shading: */ - for (i = 0; i < TILESIZE; i += 8) { - for (j = 0; j < TILESIZE; j += 2) { + for (i = 0; i < TILE_SIZE; i += 8) { + for (j = 0; j < TILE_SIZE; j += 2) { rast->shader_state.shade( inputs->jc, rast->x + i, rast->y + j, @@ -189,13 +208,54 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* End of tile: */ + + void lp_rast_end_tile( struct lp_rasterizer *rast, boolean write_depth ) { - /* call u_tile func to store colors to surface */ + struct pipe_surface *surface; + struct pipe_screen *screen; + struct pipe_transfer *transfer; + const unsigned x = rast->x; + const unsigned y = rast->y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + surface = rast->state.color; + if(!surface) + return; + + screen = surface->texture->screen; + + if(x + w > surface->width) + w = surface->width - x; + if(y + h > surface->height) + h = surface->height - x; + + transfer = screen->get_tex_transfer(screen, + surface->texture, + surface->face, + surface->level, + surface->zslice, + PIPE_TRANSFER_READ_WRITE, + x, y, w, h); + if(!transfer) + return; + + map = screen->transfer_map(screen, transfer); + if(map) { + lp_tile_write_4ub(transfer->format, + rast->tile.color, + map, transfer->stride, + x, y, w, h); + + screen->transfer_unmap(screen, transfer); + } + + screen->tex_transfer_destroy(screen, transfer); if (write_depth) { - /* call u_tile func to store depth/stencil to surface */ + /* FIXME: call u_tile func to store depth/stencil to surface */ } } @@ -203,6 +263,8 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, */ void lp_rast_destroy( struct lp_rasterizer *rast ) { + align_free(rast->tile.depth); + align_free(rast->tile.color); FREE(rast); } -- cgit v1.2.3 From 35a90e67ebc37dc0a8432db76c91b8855a94598a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 17:59:57 +0100 Subject: llvmpipe: Assorted build fixes. --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 6 ++++-- src/gallium/drivers/llvmpipe/lp_state_surface.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index c9439c7154c..4015b0439a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -228,8 +228,10 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) } static void -update_culling() +update_culling(struct llvmpipe_context *lp) { + struct lp_setup_context *setup = lp->setup; + if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { @@ -283,7 +285,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } -void llvmpipe_prepare( ) +void llvmpipe_prepare(struct lp_setup_context *setup) { struct llvmpipe_context *lp = setup->llvmpipe; diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 06560335d70..bb1396c3ab8 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -82,7 +82,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } if (dirty) { - lp_setup_set_framebuffer( llvmpipe->setup, fb ); + lp_setup_set_framebuffer( lp->setup, fb ); lp->dirty |= LP_NEW_FRAMEBUFFER; } } -- cgit v1.2.3 From ab76b2a8b896edc1e972de108d044b70310b4324 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 19:03:14 +0100 Subject: llvmpipe: Complete more rasterizer methods.. --- src/gallium/drivers/llvmpipe/lp_rast.c | 68 +++++++++++------------------ src/gallium/drivers/llvmpipe/lp_rast.h | 17 +++++--- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 14 ++++-- 3 files changed, 48 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2217debc02c..50d2a0a0f37 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -50,14 +50,14 @@ struct lp_rasterizer *lp_rast_create( void ) } void lp_rast_bind_surfaces( struct lp_rasterizer *rast, - struct pipe_surface *color, - struct pipe_surface *zstencil, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, const float *clear_color, double clear_depth, unsigned clear_stencil) { - pipe_surface_reference(&rast->state.color, color); - pipe_surface_reference(&rast->state.depth, depth); + pipe_surface_reference(&rast->state.cbuf, cbuf); + pipe_surface_reference(&rast->state.zsbuf, zsbuf); } @@ -93,12 +93,12 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg) { - const unsigned clear_color = arg->clear.clear_zstencil; + const unsigned clear_zstencil = arg->clear.clear_zstencil; unsigned i, j; for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) - rast->tile.depth[i][j] = clear_depth; + rast->tile.depth[i*TILE_SIZE + j] = clear_zstencil; } @@ -119,7 +119,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg ) { - rast->shader_state = arg->state; + rast->shader_state = arg->set_state; } @@ -128,36 +128,24 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg, const struct lp_rast_shader_inputs *inputs ) { - unsigned i; - - /* Set up the silly quad coef pointers - */ - for (i = 0; i < 4; i++) { - rast->quads[i].posCoef = &inputs->posCoef; - rast->quads[i].coef = inputs->coef; - } + const uint32_t masks[4] = {~0, ~0, ~0, ~0}; + unsigned i, j; /* Use the existing preference for 8x2 (four quads) shading: */ - for (i = 0; i < TILE_SIZE; i += 8) { - for (j = 0; j < TILE_SIZE; j += 2) { - rast->shader_state.shade( inputs->jc, - rast->x + i, - rast->y + j, - rast->quads, 4 ); - } - } + for (i = 0; i < TILE_SIZE; i += 8) + for (j = 0; j < TILE_SIZE; j += 2) + lp_rast_shade_quads( rast, inputs, i, j, &masks); } -void lp_rast_shade_quads( const struct lp_rast_state *state, - struct lp_rast_tile *tile, - struct quad_header **quads, - unsigned nr ) +void lp_rast_shade_quads( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + const unsigned *masks) { - struct quad_header *quad = quads[0]; - const unsigned x = quad->input.x0; - const unsigned y = quad->input.y0; + const struct lp_rast_state *state = rast->shader_state; + struct lp_rast_tile *tile = &rast->tile; uint8_t *color; uint8_t *depth; uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; @@ -165,18 +153,13 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, unsigned q; /* Sanity checks */ - assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); - for (q = 0; q < nr; ++q) { - assert(quads[q]->input.x0 == x + q*2); - assert(quads[q]->input.y0 == y); - } /* mask */ for (q = 0; q < 4; ++q) for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; + mask[q][chan_index] = masks[q] & (1 << chan_index) ? ~0 : 0; /* color buffer */ color = &TILE_PIXEL(tile->color, x, y, 0); @@ -184,7 +167,7 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = (uint8_t *)tile->depth + y*TILE_SIZE*4 + 2*x*4; + depth = tile->depth + y*TILE_SIZE + 2*x; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); @@ -196,9 +179,9 @@ void lp_rast_shade_quads( const struct lp_rast_state *state, /* run shader */ state->shader( &state->jc, x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, + inputs->a0, + inputs->dadx, + inputs->dady, &mask[0][0], color, depth); @@ -220,8 +203,9 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, const unsigned y = rast->y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; + void *map; - surface = rast->state.color; + surface = rast->state.cbuf; if(!surface) return; @@ -252,7 +236,7 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, screen->transfer_unmap(screen, transfer); } - screen->tex_transfer_destroy(screen, transfer); + screen->tex_transfer_destroy(transfer); if (write_depth) { /* FIXME: call u_tile func to store depth/stencil to surface */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index f40208bbda9..380a1adbd29 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -28,6 +28,7 @@ #ifndef LP_RAST_H #define LP_RAST_H +#include "pipe/p_compiler.h" #include "lp_jit.h" /* Initially create and program a single rasterizer directly. Later @@ -91,9 +92,6 @@ struct lp_rast_triangle { float dx12; float dx23; float dx31; - - /* State to run the shader: */ - struct lp_rast_shader_inputs inputs; }; struct clear_tile { @@ -112,8 +110,8 @@ struct load_tile { struct lp_rasterizer *lp_rast_create( void ); void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *color, - struct pipe_surface *zstencil, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, const float *clear_color, double clear_depth, unsigned clear_stencil); @@ -154,7 +152,8 @@ void lp_rast_triangle( struct lp_rasterizer *, const union lp_rast_cmd_arg * ); void lp_rast_shade_tile( struct lp_rasterizer *, - const union lp_rast_cmd_arg * ); + const union lp_rast_cmd_arg *, + const struct lp_rast_shader_inputs *); void lp_rast_store_color( struct lp_rasterizer *, const union lp_rast_cmd_arg *); @@ -163,6 +162,12 @@ void lp_rast_store_zstencil( struct lp_rasterizer *, const union lp_rast_cmd_arg *); +/* End of tile: + */ + +void lp_rast_end_tile( struct lp_rasterizer *rast, + boolean write_depth ); + /* Shutdown: */ void lp_rast_destroy( struct lp_rasterizer * ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index b819519553f..29e4c8fd800 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -38,7 +38,7 @@ struct lp_rast_tile { uint8_t *color; - uint8_t *depth; + uint32_t *depth; }; @@ -55,12 +55,20 @@ struct lp_rasterizer { struct { - struct pipe_surface *color; - struct pipe_surface *zstencil; + struct pipe_surface *cbuf; + struct pipe_surface *zsbuf; unsigned clear_color; unsigned clear_depth; char clear_stencil; } state; + + const struct lp_rast_state *shader_state; }; + +void lp_rast_shade_quads( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + const unsigned *masks); + #endif -- cgit v1.2.3 From f92787679d668bd1f48929da49d4df55be635fa9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 19:03:35 +0100 Subject: llvmpipe: More assorted build fixes. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_setup.c | 1 - src/gallium/drivers/llvmpipe/lp_state_derived.c | 30 ++++--------------------- 3 files changed, 6 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 40965d5f659..63e956fb20a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -203,8 +203,8 @@ void lp_rast_triangle( struct lp_rasterizer *rast, */ minx = MAX2(tri->maxx, rast->x); miny = MAX2(tri->miny, rast->y); - maxx = MIN2(tri->maxx, rast->x + TILESIZE); - maxy = MIN2(tri->maxy, rast->y + TILESIZE); + maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); + maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); if (miny == maxy || minx == maxx) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 43a4f5f0297..9016c4b3645 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -250,7 +250,6 @@ lp_setup_clear( struct setup_context *setup, { if (setup->state == SETUP_ACTIVE) { struct lp_rast_clear_info *clear_info; - unsigned i, j; clear_info = alloc_clear_info( setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 4015b0439a6..b801f054a2f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -205,28 +205,7 @@ compute_cliprect(struct llvmpipe_context *lp) } -static void -update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) -{ - unsigned i; - - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - } - - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; -} - +#if 0 static void update_culling(struct llvmpipe_context *lp) { @@ -243,6 +222,7 @@ update_culling(struct llvmpipe_context *lp) setup->winding = PIPE_WINDING_NONE; } } +#endif /* Hopefully this will remain quite simple, otherwise need to pull in @@ -259,10 +239,6 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) llvmpipe->dirty |= LP_NEW_TEXTURE; } - if (llvmpipe->dirty & (LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - update_tgsi_samplers( llvmpipe ); - if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | LP_NEW_VS)) @@ -285,6 +261,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } +#if 0 void llvmpipe_prepare(struct lp_setup_context *setup) { struct llvmpipe_context *lp = setup->llvmpipe; @@ -294,3 +271,4 @@ void llvmpipe_prepare(struct lp_setup_context *setup) } } +#endif -- cgit v1.2.3 From 86dba3e4142276d76ecffc0cd238506df5efe9af Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 19:16:47 +0100 Subject: llvmpipe: Final adjustments to rasterizer methods. --- src/gallium/drivers/llvmpipe/lp_rast.c | 22 ++++++++++++---------- src/gallium/drivers/llvmpipe/lp_rast.h | 2 ++ 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 50d2a0a0f37..9d1861d2464 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -74,8 +74,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg ) { - const unsigned clear_color = arg->clear.clear_color; - unsigned i, j; + const uint8_t *clear_color = arg->clear_color; if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && @@ -83,6 +82,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } else { + unsigned x, y, chan; for (y = 0; y < TILE_SIZE; y++) for (x = 0; x < TILE_SIZE; x++) for (chan = 0; chan < 4; ++chan) @@ -93,7 +93,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg) { - const unsigned clear_zstencil = arg->clear.clear_zstencil; + const unsigned clear_zstencil = arg->clear_zstencil; unsigned i, j; for (i = 0; i < TILE_SIZE; i++) @@ -128,14 +128,14 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg *arg, const struct lp_rast_shader_inputs *inputs ) { - const uint32_t masks[4] = {~0, ~0, ~0, ~0}; - unsigned i, j; + const unsigned masks[4] = {~0, ~0, ~0, ~0}; + unsigned x, y; /* Use the existing preference for 8x2 (four quads) shading: */ - for (i = 0; i < TILE_SIZE; i += 8) - for (j = 0; j < TILE_SIZE; j += 2) - lp_rast_shade_quads( rast, inputs, i, j, &masks); + for (y = 0; y < TILE_SIZE; y += 2) + for (x = 0; x < TILE_SIZE; x += 8) + lp_rast_shade_quads( rast, inputs, x, y, masks); } @@ -146,8 +146,8 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, { const struct lp_rast_state *state = rast->shader_state; struct lp_rast_tile *tile = &rast->tile; - uint8_t *color; - uint8_t *depth; + void *color; + void *depth; uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; @@ -247,6 +247,8 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, */ void lp_rast_destroy( struct lp_rasterizer *rast ) { + pipe_surface_reference(&rast->state.cbuf, NULL); + pipe_surface_reference(&rast->state.zsbuf, NULL); align_free(rast->tile.depth); align_free(rast->tile.color); FREE(rast); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 380a1adbd29..0aa111b4723 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -128,6 +128,8 @@ union lp_rast_cmd_arg { const struct lp_rast_shader_inputs *shade_tile; const struct lp_rast_triangle *triangle; const struct lp_rast_state *set_state; + const uint8_t clear_color[4]; + unsigned clear_zstencil; }; -- cgit v1.2.3 From c7227f4b8d76d70b4f7ab8d384befd823c2be03e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 19:14:46 +0100 Subject: llvmpipe: rast_tri updates --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 63e956fb20a..896ac253a02 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,18 +29,10 @@ * Rasterization for binned triangles within a tile */ -#include "lp_context.h" #include "lp_quad.h" #include "lp_quad_pipe.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" -#include "util/u_math.h" -#include "util/u_memory.h" +#include "lp_rast_priv.h" + #define BLOCKSIZE 4 @@ -48,7 +40,7 @@ /* Convert 8x8 block into four runs of quads and render each in turn. */ #if (BLOCKSIZE == 8) -static void block_full( struct triangle *tri, int x, int y ) +static void block_full( struct lp_rast_triangle *tri, int x, int y ) { struct quad_header *ptrs[4]; int i; @@ -79,7 +71,7 @@ static void block_full( struct triangle *tri, int x, int y ) } } #else -static void block_full( struct triangle *tri, int x, int y ) +static void block_full( struct lp_rast_triangle *tri, int x, int y ) { struct quad_header *ptrs[4]; int iy; @@ -108,7 +100,7 @@ do_quad( struct lp_rasterizer *rast, int x, int y, float c1, float c2, float c3 ) { - struct triangle *tri = rast->tri; + struct lp_rast_triangle *tri = rast->tri; struct quad_header *quad = &rast->quad[0]; float xstep1 = -tri->dy12; @@ -151,7 +143,7 @@ do_quad( struct lp_rasterizer *rast, * the quad: */ static void -do_block( struct triangle *tri, +do_block( struct lp_rast_triangle *tri, int x, int y, float c1, float c2, -- cgit v1.2.3 From 07ee87e6645318a34a395a50f4e8d554d118d24f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 19:18:35 +0100 Subject: llvmpipe: Fix typo. --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9d1861d2464..170684c1b23 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -214,7 +214,7 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, if(x + w > surface->width) w = surface->width - x; if(y + h > surface->height) - h = surface->height - x; + h = surface->height - y; transfer = screen->get_tex_transfer(screen, surface->texture, -- cgit v1.2.3 From 7ef36171d5d58b97ffa179f824d77a9c339a7ae4 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 19:53:14 +0100 Subject: llvmpipe: Call lp_rast_shade_quads from tri rasterizer. --- src/gallium/drivers/llvmpipe/lp_rast.h | 3 + src/gallium/drivers/llvmpipe/lp_rast_tri.c | 276 +++++++++-------------------- 2 files changed, 90 insertions(+), 189 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 0aa111b4723..41a7f5ebbf3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -92,6 +92,9 @@ struct lp_rast_triangle { float dx12; float dx23; float dx31; + + /* inputs for the shader */ + struct lp_rast_shader_inputs *inputs; }; struct clear_tile { diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 896ac253a02..138d6f55e00 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,80 +29,46 @@ * Rasterization for binned triangles within a tile */ +#include "util/u_math.h" #include "lp_quad.h" -#include "lp_quad_pipe.h" #include "lp_rast_priv.h" +#include "lp_tile_soa.h" -#define BLOCKSIZE 4 +#define BLOCKSIZE 8 /* Convert 8x8 block into four runs of quads and render each in turn. */ #if (BLOCKSIZE == 8) -static void block_full( struct lp_rast_triangle *tri, int x, int y ) +static void block_full( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y ) { - struct quad_header *ptrs[4]; - int i; - - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - tri->quad[2].input.x0 = x + 4; - tri->quad[3].input.x0 = x + 6; - - for (i = 0; i < 4; i++, y += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - tri->quad[2].inout.mask = 0xf; - tri->quad[3].inout.mask = 0xf; - - tri->quad[0].input.y0 = y; - tri->quad[1].input.y0 = y; - tri->quad[2].input.y0 = y; - tri->quad[3].input.y0 = y; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - ptrs[2] = &tri->quad[2]; - ptrs[3] = &tri->quad[3]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); - } + const unsigned masks[4] = {~0, ~0, ~0, ~0}; + int iy; + + for (iy = 0; iy < 8; iy += 2) + lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); } #else -static void block_full( struct lp_rast_triangle *tri, int x, int y ) +static void block_full( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y ) { - struct quad_header *ptrs[4]; + const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */ int iy; - tri->quad[0].input.x0 = x + 0; - tri->quad[1].input.x0 = x + 2; - - for (iy = 0; iy < 4; iy += 2) { - tri->quad[0].inout.mask = 0xf; - tri->quad[1].inout.mask = 0xf; - - tri->quad[0].input.y0 = y + iy; - tri->quad[1].input.y0 = y + iy; - - /* XXX: don't bother with this ptrs business */ - ptrs[0] = &tri->quad[0]; - ptrs[1] = &tri->quad[1]; - - tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); - } + for (iy = 0; iy < 4; iy += 2) + lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); } #endif -static void -do_quad( struct lp_rasterizer *rast, +static INLINE unsigned +do_quad( const struct lp_rast_triangle *tri, int x, int y, float c1, float c2, float c3 ) { - struct lp_rast_triangle *tri = rast->tri; - struct quad_header *quad = &rast->quad[0]; - float xstep1 = -tri->dy12; float xstep2 = -tri->dy23; float xstep3 = -tri->dy31; @@ -111,43 +77,41 @@ do_quad( struct lp_rasterizer *rast, float ystep2 = tri->dx23; float ystep3 = tri->dx31; - quad->input.x0 = x; - quad->input.y0 = y; - quad->inout.mask = 0; + unsigned mask = 0; if (c1 > 0 && c2 > 0 && c3 > 0) - quad->inout.mask |= 1; + mask |= 1; if (c1 + xstep1 > 0 && c2 + xstep2 > 0 && c3 + xstep3 > 0) - quad->inout.mask |= 2; + mask |= 2; if (c1 + ystep1 > 0 && c2 + ystep2 > 0 && c3 + ystep3 > 0) - quad->inout.mask |= 4; + mask |= 4; if (c1 + ystep1 + xstep1 > 0 && c2 + ystep2 + xstep2 > 0 && c3 + ystep3 + xstep3 > 0) - quad->inout.mask |= 8; + mask |= 8; - if (quad->inout.mask) - rast->state->run( rast->state->state, &quad, 1 ); + return mask; } /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void -do_block( struct lp_rast_triangle *tri, - int x, int y, - float c1, - float c2, - float c3 ) +do_block( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y, + float c1, + float c2, + float c3 ) { const int step = 2; @@ -166,19 +130,24 @@ do_block( struct lp_rast_triangle *tri, float cx2 = c2; float cx3 = c3; + unsigned masks[4] = {0, 0, 0, 0}; + for (ix = 0; ix < BLOCKSIZE; ix += 2) { - do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); + masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); cx1 += xstep1; cx2 += xstep2; cx3 += xstep3; } + lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + c1 += ystep1; c2 += ystep2; c3 += ystep3; } + } @@ -187,8 +156,9 @@ do_block( struct lp_rast_triangle *tri, * for this triangle: */ void lp_rast_triangle( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri ) + const union lp_rast_cmd_arg *arg ) { + const struct lp_rast_triangle *tri = arg->triangle; int minx, maxx, miny, maxy; /* Clamp to tile dimensions: @@ -205,136 +175,64 @@ void lp_rast_triangle( struct lp_rasterizer *rast, return; } - /* Bind parameter interpolants: - */ - for (i = 0; i < Elements(rast->quad); i++) { - rast->quad[i].coef = tri->coef; - rast->quad[i].posCoef = &tri->position_coef; - } - - /* Small area? - */ - if (miny + 16 > maxy && - minx + 16 > maxx) - { - const int step = 2; + const int step = BLOCKSIZE; - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; + float ei1 = tri->ei1 * step; + float ei2 = tri->ei2 * step; + float ei3 = tri->ei3 * step; - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; - int x, y; + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + int x, y; - minx &= ~(step-1); - maxx &= ~(step-1); + minx &= ~(step-1); + miny &= ~(step-1); - /* Subdivide space into NxM blocks, where each block is square and - * power-of-four in dimension. - * - * Trivially accept or reject blocks, else jump to per-pixel - * examination above. - */ - for (y = miny; y < maxy; y += step) - { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (x = minx; x < maxx; x += step) - { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else - { - do_quad(&tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } - } - else + for (y = miny; y < maxy; y += step) { - const int step = BLOCKSIZE; - - float ei1 = tri->ei1 * step; - float ei2 = tri->ei2 * step; - float ei3 = tri->ei3 * step; - - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; - - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; - - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; - int x, y; - - minx &= ~(step-1); - miny &= ~(step-1); + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; - for (y = miny; y < maxy; y += step) + for (x = minx; x < maxx; x += step) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; - - for (x = minx; x < maxx; x += step) - { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full(&tri, x, y); /* trivial accept */ - } - else - { - do_block(&tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full(rast, tri, x, y); /* trivial accept */ + } + else + { + do_block(rast, tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; } } -- cgit v1.2.3 From df8cedf9898cfc9c2ff8d5249ab1fe316f575a84 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 19:53:56 +0100 Subject: llvmpipe: Adjust interpolation coeffs declaration. --- src/gallium/drivers/llvmpipe/lp_rast.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 41a7f5ebbf3..492e4b06ada 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -61,11 +61,15 @@ struct lp_rast_shader_inputs { */ const struct lp_rast_state *state; - /* Attribute interpolation: FIXME: reduce memory waste! + /* Attribute interpolation: + * + * First coefficient is position. + * + * FIXME: reduce memory waste! */ - float a0[PIPE_MAX_ATTRIBS][4]; - float dadx[PIPE_MAX_ATTRIBS][4]; - float dady[PIPE_MAX_ATTRIBS][4]; + float a0[1 + PIPE_MAX_SHADER_INPUTS][4]; + float dadx[1 + PIPE_MAX_SHADER_INPUTS][4]; + float dady[1 + PIPE_MAX_SHADER_INPUTS][4]; }; -- cgit v1.2.3 From 21489d2275ff556f6e44008d3f5493ca64619696 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Oct 2009 19:56:01 +0100 Subject: llvmpipe: Remove quad headers. --- src/gallium/drivers/llvmpipe/lp_quad.h | 114 ----------------------------- src/gallium/drivers/llvmpipe/lp_rast.c | 1 - src/gallium/drivers/llvmpipe/lp_rast_tri.c | 1 - src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 - 4 files changed, 117 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_quad.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h deleted file mode 100644 index 7eb05de77a1..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ /dev/null @@ -1,114 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#ifndef LP_QUAD_H -#define LP_QUAD_H - -#include "pipe/p_state.h" -#include "tgsi/tgsi_exec.h" - - -#define QUAD_PRIM_POINT 1 -#define QUAD_PRIM_LINE 2 -#define QUAD_PRIM_TRI 3 - - -/* The rasterizer generates 2x2 quads of fragment and feeds them to - * the current fp_machine (see below). - * Remember that Y=0=top with Y increasing down the window. - */ -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 - -#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) -#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) -#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) -#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) -#define MASK_ALL 0xf - - -/** - * Quad stage inputs (pos, coverage, front/back face, etc) - */ -struct quad_header_input -{ - int x0, y0; /**< quad window pos, always even */ - float coverage[QUAD_SIZE]; /**< fragment coverage for antialiasing */ - unsigned facing:1; /**< Front (0) or back (1) facing? */ - unsigned prim:2; /**< QUAD_PRIM_POINT, LINE, TRI */ -}; - - -/** - * Quad stage inputs/outputs. - */ -struct quad_header_inout -{ - unsigned mask:4; -}; - - -/** - * Quad stage outputs (color & depth). - */ -struct quad_header_output -{ - /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; -}; - - -/** - * Input interpolation coefficients - */ -struct quad_interp_coef -{ - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; -}; - - -/** - * Encodes everything we need to know about a 2x2 pixel block. Uses - * "Channel-Serial" or "SoA" layout. - */ -struct quad_header { - struct quad_header_input input; - struct quad_header_inout inout; - - /* Redundant/duplicated: - */ - const struct quad_interp_coef *coef; -}; - -#endif /* LP_QUAD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 170684c1b23..110caafffb6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,7 +28,6 @@ #include "util/u_memory.h" #include "lp_state.h" -#include "lp_quad.h" #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 138d6f55e00..86c785babb8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -30,7 +30,6 @@ */ #include "util/u_math.h" -#include "lp_quad.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b00be0cc32a..2c8b383123c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -84,7 +84,6 @@ #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" -#include "lp_quad.h" #include "lp_tex_sample.h" -- cgit v1.2.3 From 0718c7700533a965d7cd06b4f67b82bbae6e66a1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 19:58:28 +0100 Subject: llvmpipe: work on clears and coefficients --- src/gallium/drivers/llvmpipe/lp_rast.c | 15 +- src/gallium/drivers/llvmpipe/lp_rast.h | 27 +--- src/gallium/drivers/llvmpipe/lp_setup.c | 51 +++++-- src/gallium/drivers/llvmpipe/lp_setup.h | 15 ++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 186 ++++++++++++------------ 6 files changed, 166 insertions(+), 133 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 110caafffb6..695ddc089a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -48,14 +48,17 @@ struct lp_rasterizer *lp_rast_create( void ) return rast; } -void lp_rast_bind_surfaces( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - const float *clear_color, - double clear_depth, - unsigned clear_stencil) +void lp_rast_bind_color( struct lp_rasterizer *rast, + struct pipe_surface *cbuf, + boolean write_color ) { pipe_surface_reference(&rast->state.cbuf, cbuf); +} + +void lp_rast_bind_zstencil( struct lp_rasterizer *rast, + struct pipe_surface *zsbuf, + boolean write_zstencil ) +{ pipe_surface_reference(&rast->state.zsbuf, zsbuf); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 492e4b06ada..28bb0a60ebc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -101,27 +101,17 @@ struct lp_rast_triangle { struct lp_rast_shader_inputs *inputs; }; -struct clear_tile { - boolean do_color; - boolean do_depth_stencil; - unsigned rgba; - unsigned depth_stencil; -}; - -struct load_tile { - boolean do_color; - boolean do_depth_stencil; -}; struct lp_rasterizer *lp_rast_create( void ); -void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - const float *clear_color, - double clear_depth, - unsigned clear_stencil); +void lp_rast_bind_color( struct lp_rasterizer *, + struct pipe_surface *cbuf, + boolean write_when_done ); + +void lp_rast_bind_depth( struct lp_rasterizer *, + struct pipe_surface *zsbuf, + boolean write_when_done ); /* Begining of each tile: */ @@ -174,8 +164,7 @@ void lp_rast_store_zstencil( struct lp_rasterizer *, /* End of tile: */ -void lp_rast_end_tile( struct lp_rasterizer *rast, - boolean write_depth ); +void lp_rast_end_tile( struct lp_rasterizer *rast ); /* Shutdown: */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9016c4b3645..57ac85468d8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -58,6 +58,8 @@ static void reset_context( struct setup_context *setup ) { unsigned i, j; + /* Free binner command lists: + */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { struct cmd_block_list *list = &setup->tile[i][j]; @@ -73,6 +75,8 @@ static void reset_context( struct setup_context *setup ) } } + /* Free binned data: + */ { struct data_block_list *list = &setup->data; struct data_block *block, *tmp; @@ -84,6 +88,10 @@ static void reset_context( struct setup_context *setup ) list->head = list->tail; } + + /* Reset some state: + */ + setup->clear.flags = 0; } @@ -131,7 +139,7 @@ rasterize_bins( struct setup_context *setup, } } - lp_rast_finish_tile( rast ); + lp_rast_end_tile( rast ); } } @@ -144,10 +152,10 @@ static void begin_binning( struct setup_context *setup ) { if (setup->fb.color) { - if (setup->fb.clear_color) + if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, - &setup->clear_data ); + &setup->clear.color ); else bin_everywhere( setup, lp_rast_load_color, @@ -155,10 +163,10 @@ begin_binning( struct setup_context *setup ) } if (setup->fb.zstencil) { - if (setup->fb.clear_zstencil) + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, - &setup->clear_data ); + &setup->clear.zstencil ); else bin_everywhere( setup, lp_rast_load_zstencil, @@ -176,7 +184,7 @@ static void execute_clears( struct setup_context *setup ) { begin_binning( setup ); - rasterize_bins( setup ); + rasterize_bins( setup, TRUE ); } @@ -192,7 +200,7 @@ set_state( struct setup_context *setup, switch (new_state) { case SETUP_ACTIVE: if (old_state == SETUP_FLUSHED) - setup_begin_binning( setup ); + begin_binning( setup ); break; case SETUP_CLEARED: @@ -203,10 +211,10 @@ set_state( struct setup_context *setup, break; case SETUP_FLUSHED: - if (old_state == SETUP_CLEAR) + if (old_state == SETUP_CLEARED) execute_clears( setup ); else - rasterize_bins( setup ); + rasterize_bins( setup, TRUE ); break; } @@ -271,15 +279,20 @@ lp_setup_clear( struct setup_context *setup, } else { set_state( setup, SETUP_CLEARED ); + setup->clear.flags |= flags; if (flags & PIPE_CLEAR_COLOR) { - memcpy(setup->clear.color, color, sizeof setup->clear.color); + util_pack_color(rgba, + setup->fb.cbuf->format, + &setup->clear.color.clear_color ); } if (flags & PIPE_CLEAR_DEPTH_STENCIL) { - setup->clear.depth = clear_depth; - setup->clear.stencil = clear_stencil; + setup->clear.zstencil.clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); } } } @@ -293,6 +306,12 @@ lp_setup_set_fs_inputs( struct setup_context *setup, memcpy( setup->interp, interp, nr * sizeof interp[0] ); } +void +lp_setup_set_shader_state( struct setup_context *setup, + const struct jit_context *jc ) +{ +} + static void first_triangle( struct setup_context *setup, @@ -324,10 +343,10 @@ lp_setup_line(struct setup_context *setup, } void -lp_setup_triangle(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) +lp_setup_tri(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { setup->triangle( setup, v0, v1, v2 ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6f560f5f931..7c813070b95 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -65,6 +65,17 @@ void lp_setup_point( struct setup_context *setup, const float (*v0)[4] ); + +void +lp_setup_flush( struct setup_context *setup, + unsigned flags ); + + +void +lp_setup_bind_framebuffer( struct setup_context *setup, + struct pipe_surface *color, + struct pipe_surface *zstencil ); + void lp_setup_set_triangle_state( struct setup_context *setup, unsigned cullmode, @@ -75,6 +86,10 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const enum lp_interp *interp, unsigned nr ); +void +lp_setup_set_shader_state( struct setup_context *setup, + const struct jit_context *jc ); + boolean lp_setup_is_texture_referenced( struct setup_context *setup, const struct pipe_texture *texture ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 19d163df8e5..5722e3e9de8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -88,9 +88,8 @@ struct setup_context { struct { unsigned flags; - float clear_color[4]; - double clear_depth; - unsigned clear_stencil; + union lp_rast_cmd_arg color; + union lp_rast_cmd_arg zstencil; } clear; enum { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 75a0ea88881..efd91124a09 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -38,55 +38,60 @@ /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct tgsi_interp_coef *coef, +static void constant_coef( struct lp_rast_triangle *tri, const float (*v3)[4], unsigned vert_attr, unsigned i ) { - coef->a0[i] = v3[vert_attr][i]; - coef->dadx[i] = 0; - coef->dady[i] = 0; + tri->inputs.a0[i] = v3[vert_attr][i]; + tri->inputs.dadx[i] = 0; + tri->inputs.dady[i] = 0; } /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. */ -static void linear_coef( struct triangle *tri, - struct tgsi_interp_coef *coef, +static void linear_coef( struct lp_rast_triangle *tri, + unsigned input, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], - unsigned vert_attr, - unsigned i) + unsigned vert_attr) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; - - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - coef->a0[i] = (v1[vert_attr][i] - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + unsigned i; + + input *= 4; + + for (i = 0; i < NUM_CHANNELS; i++) { + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + tri->inputs.dadx[input+i] = dadx; + tri->inputs.dady[input+i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[input+i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); + } } @@ -98,30 +103,35 @@ static void linear_coef( struct triangle *tri, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct triangle *tri, - struct tgsi_interp_coef *coef, +static void perspective_coef( struct lp_rast_triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], unsigned vert_attr, unsigned i) { - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + unsigned i; + + input *= 4; + + for (i = 0; i < NUM_CHANNELS; i++) { + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + tri->inputs.dadx[input+i] = dadx; + tri->inputs.dady[input+i] = dady; + tri->inputs.a0[input+i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); + } } @@ -132,24 +142,26 @@ static void perspective_coef( struct triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct triangle *tri, unsigned slot) +setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot) { + slot *= 4; + /*X*/ - tri->coef[slot].a0[0] = 0.0; - tri->coef[slot].dadx[0] = 1.0; - tri->coef[slot].dady[0] = 0.0; + tri->inputs.a0[slot+0] = 0.0; + tri->inputs.dadx[slot+0] = 1.0; + tri->inputs.dady[slot+0] = 0.0; /*Y*/ - tri->coef[slot].a0[1] = 0.0; - tri->coef[slot].dadx[1] = 0.0; - tri->coef[slot].dady[1] = 1.0; + tri->inputs.a0[slot+1] = 0.0; + tri->inputs.dadx[slot+1] = 0.0; + tri->inputs.dady[slot+1] = 1.0; /*Z*/ - tri->coef[slot].a0[2] = tri->position_coef.a0[2]; - tri->coef[slot].dadx[2] = tri->position_coef.dadx[2]; - tri->coef[slot].dady[2] = tri->position_coef.dady[2]; + tri->inputs.a0[slot+2] = tri->inputs.a0[2]; + tri->inputs.dadx[slot+2] = tri->inputs.dadx[2]; + tri->inputs.dady[slot+2] = tri->inputs.dady[2]; /*W*/ - tri->coef[slot].a0[3] = tri->position_coef.a0[3]; - tri->coef[slot].dadx[3] = tri->position_coef.dadx[3]; - tri->coef[slot].dady[3] = tri->position_coef.dady[3]; + tri->inputs.a0[slot+3] = tri->inputs.a0[3]; + tri->inputs.dadx[slot+3] = tri->inputs.dadx[3]; + tri->inputs.dady[slot+3] = tri->inputs.dady[3]; } @@ -158,50 +170,46 @@ setup_fragcoord_coef(struct triangle *tri, unsigned slot) * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_tri_coefficients( struct setup_context *setup, - struct triangle *tri, + struct lp_rast_triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontface ) { - const struct vertex_info *vinfo = setup->vinfo; unsigned input; /* z and w are done by linear interpolation: */ - linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2); - linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3); + setup_fragcoord_coef(tri, 0); + linear_coef(tri, input, v1, v2, v3, vert_attr, i); - /* setup interpolation for all the remaining attributes: + /* setup interpolation for all the remaining attrbutes: */ - for (input = 0; input < vinfo->num_fs_inputs; input++) { - unsigned vert_attr = vinfo->attrib[input].src_index; + for (input = 0; input < setup->fs.nr_inputs; input++) { + unsigned vert_attr = setup->fs.input[input].src_index; unsigned i; - switch (vinfo->attrib[input].interp_mode) { - case INTERP_CONSTANT: - for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(tri->coef[input], v3, vert_attr, i); + switch (setup->fs.input[input].interp_mode) { + case LP_INTERP_CONSTANT: + constant_coef(tri, input, v3, vert_attr, i); break; - case INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); + case LP_INTERP_LINEAR: + linear_coef(tri, input, v1, v2, v3, vert_attr, i); break; - case INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); + case LP_INTERP_PERSPECTIVE: + perspective_coef(tri, input, v1, v2, v3, vert_attr, i); break; - case INTERP_POS: + case LP_INTERP_POS: setup_fragcoord_coef(tri, input); break; - case INTERP_FACING: - tri->coef[input].a0[0] = 1.0f - frontface; - tri->coef[input].dadx[0] = 0.0; - tri->coef[input].dady[0] = 0.0; + case LP_INTERP_FACING: + tri->inputs.a0[input*4+0] = 1.0f - frontface; + tri->inputs.dadx[input*4+0] = 0.0; + tri->da[input].dady[0] = 0.0; break; default: @@ -255,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle *tri = allocate_triangle( setup ); + struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri ); float area; float c1, c2, c3; int i; -- cgit v1.2.3 From 1814395b7ef3506935ae1f12630cdd602e15cd55 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 22:46:21 +0100 Subject: llvmpipe: triangle struct owns its copy of shader inputs --- src/gallium/drivers/llvmpipe/lp_rast.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 28bb0a60ebc..64d668f9983 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -98,7 +98,7 @@ struct lp_rast_triangle { float dx31; /* inputs for the shader */ - struct lp_rast_shader_inputs *inputs; + struct lp_rast_shader_inputs inputs; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 86c785babb8..8cd3fcc360b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -48,7 +48,7 @@ static void block_full( struct lp_rasterizer *rast, int iy; for (iy = 0; iy < 8; iy += 2) - lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); } #else static void block_full( struct lp_rasterizer *rast, @@ -59,7 +59,7 @@ static void block_full( struct lp_rasterizer *rast, int iy; for (iy = 0; iy < 4; iy += 2) - lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); } #endif @@ -140,7 +140,7 @@ do_block( struct lp_rasterizer *rast, cx3 += xstep3; } - lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); c1 += ystep1; c2 += ystep2; -- cgit v1.2.3 From 253dfed93918bd87c4a55047a9d569ede545f8be Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 23:08:41 +0100 Subject: llvmpipe: get lp_setup compiling --- src/gallium/drivers/llvmpipe/lp_setup.c | 131 ++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup.h | 11 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 28 +++-- 3 files changed, 117 insertions(+), 53 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 57ac85468d8..9f1b3d21f09 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -35,6 +35,10 @@ #include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "pipe/p_defines.h" + +static void set_state( struct setup_context *, unsigned ); void lp_setup_new_cmd_block( struct cmd_block_list *list ) { @@ -54,6 +58,37 @@ void lp_setup_new_data_block( struct data_block_list *list ) block->used = 0; } + +static void +first_triangle( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + set_state( setup, SETUP_ACTIVE ); + lp_setup_choose_triangle( setup ); + setup->triangle( setup, v0, v1, v2 ); +} + +static void +first_line( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + set_state( setup, SETUP_ACTIVE ); + lp_setup_choose_line( setup ); + setup->line( setup, v0, v1 ); +} + +static void +first_point( struct setup_context *setup, + const float (*v0)[4]) +{ + set_state( setup, SETUP_ACTIVE ); + lp_setup_choose_point( setup ); + setup->point( setup, v0 ); +} + static void reset_context( struct setup_context *setup ) { unsigned i, j; @@ -92,6 +127,13 @@ static void reset_context( struct setup_context *setup ) /* Reset some state: */ setup->clear.flags = 0; + + /* Have an explicit "start-binning" call and get rid of this + * pointer twiddling? + */ + setup->line = first_line; + setup->point = first_point; + setup->triangle = first_triangle; } @@ -119,11 +161,11 @@ rasterize_bins( struct setup_context *setup, unsigned i,j,k; lp_rast_bind_color( rast, - setup->fb.color, + setup->fb.cbuf, TRUE ); /* WRITE */ lp_rast_bind_depth( rast, - setup->fb.zstencil, + setup->fb.zsbuf, write_depth ); /* WRITE */ for (i = 0; i < setup->tiles_x; i++) { @@ -151,7 +193,7 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { - if (setup->fb.color) { + if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, @@ -162,7 +204,7 @@ begin_binning( struct setup_context *setup ) NULL ); } - if (setup->fb.zstencil) { + if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, @@ -239,8 +281,8 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_state( setup, SETUP_FLUSHED ); - pipe_surface_reference( &setup->fb.color, color ); - pipe_surface_reference( &setup->fb.zstencil, zstencil ); + pipe_surface_reference( &setup->fb.cbuf, color ); + pipe_surface_reference( &setup->fb.zsbuf, zstencil ); width = MAX2( color->width, zstencil->width ); height = MAX2( color->height, zstencil->height ); @@ -251,44 +293,55 @@ lp_setup_bind_framebuffer( struct setup_context *setup, void lp_setup_clear( struct setup_context *setup, - const float *clear_color, - double clear_depth, - unsigned clear_stencil, + const float *color, + double depth, + unsigned stencil, unsigned flags ) { if (setup->state == SETUP_ACTIVE) { - struct lp_rast_clear_info *clear_info; + /* Add the clear to existing bins. In the unusual case where + * both color and depth-stencilare being cleared, we could + * discard the currently binned scene and start again, but I + * don't see that as being a common usage. + */ + if (flags & PIPE_CLEAR_COLOR) { + union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); - clear_info = alloc_clear_info( setup ); + util_pack_color(color, + setup->fb.cbuf->format, + &arg->clear_color ); - if (flags & PIPE_CLEAR_COLOR) { - pack_color( setup, - clear_info->color, - clear_color ); - bin_everywhere(setup, lp_rast_clear_color, clear_info ); + bin_everywhere(setup, lp_rast_clear_color, arg ); } - if (flags & PIPE_CLEAR_DEPTH_STENCIL) { - pack_depth_stencil( setup, - clear_info->depth, - clear_depth, - clear_stencil ); + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { + union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); + + arg->clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); - bin_everywhere(setup, lp_rast_clear_zstencil, clear_info ); + bin_everywhere(setup, lp_rast_clear_zstencil, arg ); } } else { + /* Put ourselves into the 'pre-clear' state, specifically to try + * and accumulate multiple clears to color and depth_stencil + * buffers which the app or state-tracker might issue + * separately. + */ set_state( setup, SETUP_CLEARED ); setup->clear.flags |= flags; if (flags & PIPE_CLEAR_COLOR) { - util_pack_color(rgba, + util_pack_color(color, setup->fb.cbuf->format, &setup->clear.color.clear_color ); } - if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { setup->clear.zstencil.clear_zstencil = util_pack_z_stencil(setup->fb.zsbuf->format, depth, @@ -300,28 +353,21 @@ lp_setup_clear( struct setup_context *setup, void lp_setup_set_fs_inputs( struct setup_context *setup, - const enum lp_interp *interp, + const struct lp_shader_input *input, unsigned nr ) { - memcpy( setup->interp, interp, nr * sizeof interp[0] ); + memcpy( setup->fs.input, input, nr * sizeof input[0] ); + setup->fs.nr_inputs = nr; } void lp_setup_set_shader_state( struct setup_context *setup, - const struct jit_context *jc ) + const struct lp_jit_context *jc ) { + } -static void -first_triangle( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) -{ - set_state( setup, STATE_ACTIVE ); - setup_choose_triangle( setup, v0, v1, v2 ); -} @@ -352,7 +398,8 @@ lp_setup_tri(struct setup_context *setup, } -void setup_destroy_context( struct setup_context *setup ) +void +lp_setup_destroy( struct setup_context *setup ) { lp_rast_destroy( setup->rast ); FREE( setup ); @@ -363,18 +410,20 @@ void setup_destroy_context( struct setup_context *setup ) * Create a new primitive tiling engine. Currently also creates a * rasterizer to use with it. */ -struct setup_context *setup_create_context( void ) +struct setup_context * +lp_setup_create( void ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); + unsigned i, j; - setup->rast = lp_rast_create( void ); + setup->rast = lp_rast_create(); if (!setup->rast) goto fail; for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) - setup->tile[i][j].first = - setup->tile[i][j].next = CALLOC_STRUCT(cmd_block); + setup->tile[i][j].head = + setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); return setup; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 7c813070b95..04f9f878926 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -37,8 +37,15 @@ enum lp_interp { LP_INTERP_FACING }; +struct lp_shader_input { + enum lp_interp interp; + unsigned vs_output; +}; + struct pipe_texture; +struct pipe_surface; struct setup_context; +struct lp_jit_context; struct setup_context * lp_setup_create( void ); @@ -83,12 +90,12 @@ lp_setup_set_triangle_state( struct setup_context *setup, void lp_setup_set_fs_inputs( struct setup_context *setup, - const enum lp_interp *interp, + const struct lp_shader_input *interp, unsigned nr ); void lp_setup_set_shader_state( struct setup_context *setup, - const struct jit_context *jc ); + const struct lp_jit_context *jc ); boolean lp_setup_is_texture_referenced( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 5722e3e9de8..37caeed85fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -31,8 +31,17 @@ #include "lp_setup.h" #include "lp_rast.h" +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 +#define TILES_X (MAXWIDTH / TILESIZE) +#define TILES_Y (MAXHEIGHT / TILESIZE) + #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) + /* switch to a non-pointer value for this: */ @@ -62,12 +71,6 @@ struct data_block_list { }; -/* We're limited to 2K by 2K for 32bit fixed point rasterization. - * Will need a 64-bit version for larger framebuffers. - */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 - struct setup_context { struct lp_rasterizer *rast; @@ -75,15 +78,15 @@ struct setup_context { /* When there are multiple threads, will want to double-buffer the * bin arrays: */ - struct cmd_block_list tile[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; + struct cmd_block_list tile[TILES_X][TILES_Y]; struct data_block_list data; unsigned tiles_x; unsigned tiles_y; struct { - struct pipe_surface *color; - struct pipe_surface *zstencil; + struct pipe_surface *cbuf; + struct pipe_surface *zsbuf; } fb; struct { @@ -99,7 +102,7 @@ struct setup_context { } state; struct { - enum lp_interp inputs[PIPE_MAX_ATTRIBS]; + struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; } fs; @@ -116,6 +119,11 @@ struct setup_context { const float (*v2)[4]); }; +void lp_setup_choose_triangle( struct setup_context *setup ); +void lp_setup_choose_line( struct setup_context *setup ); +void lp_setup_choose_point( struct setup_context *setup ); + + void lp_setup_new_data_block( struct data_block_list *list ); void lp_setup_new_cmd_block( struct cmd_block_list *list ); -- cgit v1.2.3 From de902d3275d1861beb0cebdf0807a17e2682c8de Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 10:23:53 +0100 Subject: llvmpipe: more wip on coefficients --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 206 ++++++++++++++-------------- 1 file changed, 103 insertions(+), 103 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index efd91124a09..382a52e9519 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,23 +29,23 @@ * Binning code for triangles */ -#include "lp_setup.h" -#include "lp_state.h" +#include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" +#define NUM_CHANNELS 4 /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_rast_triangle *tri, - const float (*v3)[4], - unsigned vert_attr, - unsigned i ) + unsigned slot, + const float value, + unsigned i ) { - tri->inputs.a0[i] = v3[vert_attr][i]; - tri->inputs.dadx[i] = 0; - tri->inputs.dady[i] = 0; + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0; + tri->inputs.dady[slot][i] = 0; } /** @@ -53,45 +53,40 @@ static void constant_coef( struct lp_rast_triangle *tri, * for a triangle. */ static void linear_coef( struct lp_rast_triangle *tri, - unsigned input, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - unsigned vert_attr) + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) { - unsigned i; - - input *= 4; - - for (i = 0; i < NUM_CHANNELS; i++) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; - - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - tri->inputs.dadx[input+i] = dadx; - tri->inputs.dady[input+i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - tri->inputs.a0[input+i] = (v1[vert_attr][i] - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); - } + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[slot][i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); } @@ -104,34 +99,29 @@ static void linear_coef( struct lp_rast_triangle *tri, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_rast_triangle *tri, + unsigned slot, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], unsigned vert_attr, - unsigned i) + unsigned i) { - unsigned i; - - input *= 4; - - for (i = 0; i < NUM_CHANNELS; i++) { - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - - tri->inputs.dadx[input+i] = dadx; - tri->inputs.dady[input+i] = dady; - tri->inputs.a0[input+i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); - } + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); } @@ -142,29 +132,37 @@ static void perspective_coef( struct lp_rast_triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot) +setup_fragcoord_coef(struct lp_rast_triangle *tri, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4]) { - slot *= 4; - /*X*/ - tri->inputs.a0[slot+0] = 0.0; - tri->inputs.dadx[slot+0] = 1.0; - tri->inputs.dady[slot+0] = 0.0; + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; /*Y*/ - tri->inputs.a0[slot+1] = 0.0; - tri->inputs.dadx[slot+1] = 0.0; - tri->inputs.dady[slot+1] = 1.0; + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; /*Z*/ - tri->inputs.a0[slot+2] = tri->inputs.a0[2]; - tri->inputs.dadx[slot+2] = tri->inputs.dadx[2]; - tri->inputs.dady[slot+2] = tri->inputs.dady[2]; + linear_coef(tri, slot, v1, v2, v3, 0, 2); /*W*/ - tri->inputs.a0[slot+3] = tri->inputs.a0[3]; - tri->inputs.dadx[slot+3] = tri->inputs.dadx[3]; - tri->inputs.dady[slot+3] = tri->inputs.dady[3]; + linear_coef(tri, slot, v1, v2, v3, 0, 3); } +static void setup_facing_coef( struct lp_rast_triangle *tri, + unsigned slot, + boolean frontface ) +{ + constant_coef( tri, slot, 1.0f - frontface, 0 ); + constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ +} + /** * Compute the tri->coef[] array dadx, dady, a0 values. @@ -176,40 +174,42 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface ) { - unsigned input; + unsigned slot; - /* z and w are done by linear interpolation: + /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, 0); - linear_coef(tri, input, v1, v2, v3, vert_attr, i); + setup_fragcoord_coef(tri, 0, v1, v2, v3); /* setup interpolation for all the remaining attrbutes: */ - for (input = 0; input < setup->fs.nr_inputs; input++) { - unsigned vert_attr = setup->fs.input[input].src_index; + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; unsigned i; - switch (setup->fs.input[input].interp_mode) { + switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: - constant_coef(tri, input, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(tri, slot+1, v3[vert_attr][i], i); break; case LP_INTERP_LINEAR: - linear_coef(tri, input, v1, v2, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: - perspective_coef(tri, input, v1, v2, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); break; - case LP_INTERP_POS: - setup_fragcoord_coef(tri, input); + case LP_INTERP_POSITION: + /* XXX: fix me - duplicates the values in slot zero. + */ + setup_fragcoord_coef(tri, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: - tri->inputs.a0[input*4+0] = 1.0f - frontface; - tri->inputs.dadx[input*4+0] = 0.0; - tri->da[input].dady[0] = 0.0; + setup_facing_coef(tri, slot+1, frontface); break; default: @@ -246,14 +246,14 @@ static inline float subpixel_snap( float a ) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) static void -do_triangle_ccw(struct lp_setup *setup, +do_triangle_ccw(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { - const int rt_width = setup->framebuffer.cbufs[0]->width; - const int rt_height = setup->framebuffer.cbufs[0]->height; + const int rt_width = setup->fb.width; + const int rt_height = setup->fb.height; const float y1 = subpixel_snap(v1[0][1]); const float y2 = subpixel_snap(v2[0][1]); @@ -263,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri ); + struct lp_setup_triangle *tri = get_data( setup, sizeof *tri ); float area; float c1, c2, c3; int i; -- cgit v1.2.3 From 84ab7dcf48e87350c0622c533e51aa495f7256c2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 10:24:19 +0100 Subject: llvmpipe: calculate overall width and height, pass to rasterizer --- src/gallium/drivers/llvmpipe/lp_rast.c | 24 ++++++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 15 ++++++-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 4 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 51 ++++++++++++++++++++----- src/gallium/drivers/llvmpipe/lp_setup.h | 10 ++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 + 6 files changed, 84 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 695ddc089a4..6ac44feb4c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -48,6 +48,17 @@ struct lp_rasterizer *lp_rast_create( void ) return rast; } + +void lp_rast_begin( struct lp_rasterizer *rast, + unsigned width, + unsigned height ) +{ + rast->width = width; + rast->height = height; + rast->check_for_clipped_tiles = (width % TILESIZE != 0 || + height % TILESIZE != 0); +} + void lp_rast_bind_color( struct lp_rasterizer *rast, struct pipe_surface *cbuf, boolean write_color ) @@ -195,8 +206,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, */ -void lp_rast_end_tile( struct lp_rasterizer *rast, - boolean write_depth ) +void lp_rast_end_tile( struct lp_rasterizer *rast ) { struct pipe_surface *surface; struct pipe_screen *screen; @@ -213,10 +223,10 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, screen = surface->texture->screen; - if(x + w > surface->width) - w = surface->width - x; - if(y + h > surface->height) - h = surface->height - y; + if(x + w > rast->width) + w = rast->width - x; + if(y + h > rast->height) + h = rast->height - y; transfer = screen->get_tex_transfer(screen, surface->texture, @@ -240,7 +250,7 @@ void lp_rast_end_tile( struct lp_rasterizer *rast, screen->tex_transfer_destroy(transfer); - if (write_depth) { + if (0) { /* FIXME: call u_tile func to store depth/stencil to surface */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 64d668f9983..26d057beb29 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -97,6 +97,11 @@ struct lp_rast_triangle { float dx23; float dx31; + /* XXX: these are only used inside lp_setup_tri.c, don't really + * need to bin them: + */ + float oneoverarea; + /* inputs for the shader */ struct lp_rast_shader_inputs inputs; }; @@ -105,13 +110,17 @@ struct lp_rast_triangle { struct lp_rasterizer *lp_rast_create( void ); +void lp_rast_begin( struct lp_rasterizer *, + unsigned width, + unsigned height); + void lp_rast_bind_color( struct lp_rasterizer *, struct pipe_surface *cbuf, boolean write_when_done ); -void lp_rast_bind_depth( struct lp_rasterizer *, - struct pipe_surface *zsbuf, - boolean write_when_done ); +void lp_rast_bind_zstencil( struct lp_rasterizer *, + struct pipe_surface *zsbuf, + boolean write_when_done ); /* Begining of each tile: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 29e4c8fd800..d7a8b9c257d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -52,7 +52,11 @@ struct lp_rasterizer { unsigned x; unsigned y; + boolean clipped_tile; + boolean check_for_clipped_tiles; + unsigned width; + unsigned height; struct { struct pipe_surface *cbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9f1b3d21f09..4f100808165 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -160,13 +160,23 @@ rasterize_bins( struct setup_context *setup, struct cmd_block *block; unsigned i,j,k; + if (setup->state != SETUP_ACTIVE) { + /* this can happen, not a big deal */ + debug_printf("%s called when not binning\n", __FUNCTION__); + return; + } + + lp_rast_begin( rast, + setup->fb.width, + setup->fb.height ); + lp_rast_bind_color( rast, setup->fb.cbuf, - TRUE ); /* WRITE */ + setup->fb.cbuf != NULL ); - lp_rast_bind_depth( rast, - setup->fb.zsbuf, - write_depth ); /* WRITE */ + lp_rast_bind_zstencil( rast, + setup->fb.zsbuf, + setup->fb.zsbuf != NULL && write_depth ); for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { @@ -193,15 +203,38 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { + if (!setup->fb.cbuf && !setup->fb.zsbuf) { + setup->fb.width = 0; + setup->fb.height = 0; + } + else if (!setup->fb.zsbuf) { + setup->fb.width = setup->fb.cbuf->width; + setup->fb.height = setup->fb.cbuf->height; + } + else if (!setup->fb.cbuf) { + setup->fb.width = setup->fb.zsbuf->width; + setup->fb.height = setup->fb.zsbuf->height; + } + else { + /* XXX: not sure what we're really supposed to do for + * mis-matched color & depth buffer sizes. + */ + setup->fb.width = MIN2(setup->fb.cbuf->width, + setup->fb.zsbuf->width); + setup->fb.height = MIN2(setup->fb.cbuf->height, + setup->fb.zsbuf->height); + } + + setup->tiles_x = align(setup->fb.width, TILESIZE); + setup->tiles_y = align(setup->fb.height, TILESIZE); + if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, &setup->clear.color ); else - bin_everywhere( setup, - lp_rast_load_color, - NULL ); + bin_everywhere( setup, lp_rast_load_color, NULL ); } if (setup->fb.zsbuf) { @@ -210,9 +243,7 @@ begin_binning( struct setup_context *setup ) lp_rast_clear_zstencil, &setup->clear.zstencil ); else - bin_everywhere( setup, - lp_rast_load_zstencil, - NULL ); + bin_everywhere( setup, lp_rast_load_zstencil, NULL ); } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 04f9f878926..bd439fa8578 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -37,9 +37,15 @@ enum lp_interp { LP_INTERP_FACING }; +/* Describes how to generate all the fragment shader inputs from the + * the vertices passed into our triangle/line/point functions. + * + * Vertices are treated as an array of float[4] values, indexed by + * src_index. + */ struct lp_shader_input { - enum lp_interp interp; - unsigned vs_output; + enum lp_interp interp; /* how to interpolate values */ + unsigned src_index; /* where to find values in incoming vertices */ }; struct pipe_texture; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 37caeed85fd..7410ac70b81 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -87,6 +87,8 @@ struct setup_context { struct { struct pipe_surface *cbuf; struct pipe_surface *zsbuf; + unsigned width; + unsigned height; } fb; struct { -- cgit v1.2.3 From 47510040a68f5f672aee22eac6c01fb4dd60ec67 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 10:37:24 +0100 Subject: llvmpipe: Follow write_color/write_zstencil. --- src/gallium/drivers/llvmpipe/lp_rast.c | 24 ++++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 6 ------ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 ++ 3 files changed, 22 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6ac44feb4c7..9825099c945 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -64,6 +64,7 @@ void lp_rast_bind_color( struct lp_rasterizer *rast, boolean write_color ) { pipe_surface_reference(&rast->state.cbuf, cbuf); + rast->state.write_color = write_color; } void lp_rast_bind_zstencil( struct lp_rasterizer *rast, @@ -71,6 +72,7 @@ void lp_rast_bind_zstencil( struct lp_rasterizer *rast, boolean write_zstencil ) { pipe_surface_reference(&rast->state.zsbuf, zsbuf); + rast->state.write_zstencil = write_zstencil; } @@ -206,7 +208,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, */ -void lp_rast_end_tile( struct lp_rasterizer *rast ) +static void lp_rast_store_color( struct lp_rasterizer *rast ) { struct pipe_surface *surface; struct pipe_screen *screen; @@ -250,11 +252,25 @@ void lp_rast_end_tile( struct lp_rasterizer *rast ) screen->tex_transfer_destroy(transfer); - if (0) { - /* FIXME: call u_tile func to store depth/stencil to surface */ - } } + +static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +{ + /* FIXME: call u_tile func to store depth/stencil to surface */ +} + + +void lp_rast_end_tile( struct lp_rasterizer *rast ) +{ + if (rast->state.write_color) + lp_rast_store_color(rast); + + if (rast->state.write_zstencil) + lp_rast_store_zstencil(rast); +} + + /* Shutdown: */ void lp_rast_destroy( struct lp_rasterizer *rast ) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 26d057beb29..aa50fba5a60 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -163,12 +163,6 @@ void lp_rast_shade_tile( struct lp_rasterizer *, const union lp_rast_cmd_arg *, const struct lp_rast_shader_inputs *); -void lp_rast_store_color( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); - -void lp_rast_store_zstencil( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); - /* End of tile: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index d7a8b9c257d..f5a6699ed42 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -61,6 +61,8 @@ struct lp_rasterizer { struct { struct pipe_surface *cbuf; struct pipe_surface *zsbuf; + boolean write_color; + boolean write_zstencil; unsigned clear_color; unsigned clear_depth; char clear_stencil; -- cgit v1.2.3 From 415b271b5100d64579690111bc8eb549866865a7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 10:44:07 +0100 Subject: llvmpipe: hook up some state, add stub line and point functions --- src/gallium/drivers/llvmpipe/SConscript | 3 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 23 +++++++++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 9 +++-- src/gallium/drivers/llvmpipe/lp_setup_line.c | 47 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_point.c | 46 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 +++++------ 6 files changed, 133 insertions(+), 15 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_line.c create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_point.c (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f6945535cad..3530e739cc4 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -48,6 +48,9 @@ llvmpipe = env.ConvenienceLibrary( 'lp_prim_vbuf.c', 'lp_query.c', 'lp_setup.c', + 'lp_setup_tri.c', + 'lp_setup_line.c', + 'lp_setup_point.c', 'lp_screen.c', 'lp_state_blend.c', 'lp_state_clip.c', diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4f100808165..13b40f14942 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -148,7 +148,7 @@ static void bin_everywhere( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - bin_cmd( &setup->tile[i][j], cmd, arg ); + bin_command( &setup->tile[i][j], cmd, arg ); } @@ -382,6 +382,19 @@ lp_setup_clear( struct setup_context *setup, } + +void +lp_setup_set_tri_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface) +{ + setup->ccw_is_frontface = ccw_is_frontface; + setup->cullmode = cull_mode; + setup->triangle = first_triangle; +} + + + void lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, @@ -432,6 +445,14 @@ lp_setup_tri(struct setup_context *setup, void lp_setup_destroy( struct setup_context *setup ) { + unsigned i, j; + + reset_context( setup ); + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) + FREE(setup->tile[i][j].head); + lp_rast_destroy( setup->rast ); FREE( setup ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 7410ac70b81..9411f14cfbb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -83,6 +83,9 @@ struct setup_context { unsigned tiles_x; unsigned tiles_y; + + boolean ccw_is_frontface; + unsigned cullmode; struct { struct pipe_surface *cbuf; @@ -147,9 +150,9 @@ static INLINE void *get_data( struct data_block_list *list, /* Add a command to a given bin. */ -static INLINE void bin_cmd( struct cmd_block_list *list, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg *arg ) +static INLINE void bin_command( struct cmd_block_list *list, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg *arg ) { if (list->tail->count == CMD_BLOCK_MAX) { lp_setup_new_cmd_block( list ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c new file mode 100644 index 00000000000..feea79d3943 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for lines + */ + +#include "lp_setup_context.h" + +static void line_nop( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4] ) +{ +} + + +void +lp_setup_choose_line( struct setup_context *setup ) +{ + setup->line = line_nop; +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c new file mode 100644 index 00000000000..f03ca729b24 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for points + */ + +#include "lp_setup_context.h" + +static void point_nop( struct setup_context *setup, + const float (*v0)[4] ) +{ +} + + +void +lp_setup_choose_point( struct setup_context *setup ) +{ + setup->point = point_nop; +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 382a52e9519..d3b8ce94345 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -30,6 +30,7 @@ */ #include "lp_setup_context.h" +#include "lp_rast.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -263,10 +264,9 @@ do_triangle_ccw(struct setup_context *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct lp_setup_triangle *tri = get_data( setup, sizeof *tri ); + struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; float c1, c2, c3; - int i; int minx, maxx, miny, maxy; tri->dx12 = x1 - x2; @@ -363,7 +363,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - bin_command(setup->tile[minx][miny], lp_rast_triangle, tri ); + bin_command( &setup->tile[minx][miny], lp_rast_triangle, tri ); } else { @@ -412,12 +412,12 @@ do_triangle_ccw(struct setup_context *setup, cx3 + ei3 > 0) { /* shade whole tile */ - bin_command(setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); + bin_command( &setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); } else { /* shade partial tile */ - bin_command(setup->tile[x][y], lp_rast_triangle, tri ); + bin_command( &setup->tile[x][y], lp_rast_triangle, tri ); } /* Iterate cx values across the region: @@ -477,13 +477,11 @@ static void triangle_nop( struct setup_context *setup, { } -void setup_set_tri_state( struct setup_context *setup, - unsigned cull_mode, - boolean ccw_is_frontface) -{ - setup->ccw_is_frontface = ccw_is_frontface; - switch (cull_mode) { +void +lp_setup_choose_triangle( struct setup_context *setup ) +{ + switch (setup->cull_mode) { case PIPE_WINDING_NONE: setup->triangle = triangle_both; break; -- cgit v1.2.3 From 4cdd10cb4b60d85f6c231a26739f7d5e264a05e5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 11:29:01 +0100 Subject: llvmpipe: use union lp_cmd_rast_arg directly, rather than through a pointer The union itself consists of pointers. We don't need to be passing pointer to pointers. --- src/gallium/drivers/llvmpipe/lp_rast.c | 21 ++++----- src/gallium/drivers/llvmpipe/lp_rast.h | 54 +++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 +- src/gallium/drivers/llvmpipe/lp_setup.c | 63 ++++++++++--------------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 +-- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 17 +++++-- 6 files changed, 98 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9825099c945..de15ddbb2e4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -87,9 +87,9 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, } void lp_rast_clear_color( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { - const uint8_t *clear_color = arg->clear_color; + const uint8_t *clear_color = arg.clear_color; if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && @@ -106,25 +106,24 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, } void lp_rast_clear_zstencil( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg) + const union lp_rast_cmd_arg arg) { - const unsigned clear_zstencil = arg->clear_zstencil; unsigned i, j; for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) - rast->tile.depth[i*TILE_SIZE + j] = clear_zstencil; + rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil; } void lp_rast_load_color( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg) + const union lp_rast_cmd_arg arg) { /* call u_tile func to load colors from surface */ } void lp_rast_load_zstencil( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { /* call u_tile func to load depth (and stencil?) from surface */ } @@ -132,17 +131,17 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, /* Within a tile: */ void lp_rast_set_state( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { - rast->shader_state = arg->set_state; + rast->shader_state = arg.set_state; } void lp_rast_shade_tile( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg, - const struct lp_rast_shader_inputs *inputs ) + const union lp_rast_cmd_arg arg ) { + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index aa50fba5a60..44cb4032dad 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -134,34 +134,70 @@ union lp_rast_cmd_arg { const struct lp_rast_shader_inputs *shade_tile; const struct lp_rast_triangle *triangle; const struct lp_rast_state *set_state; - const uint8_t clear_color[4]; + uint8_t clear_color[4]; unsigned clear_zstencil; }; +/* Cast wrappers. Hopefully these compile to noops! + */ +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile ) +{ + union lp_rast_cmd_arg arg; + arg.shade_tile = shade_tile; + return arg; +} + +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_triangle( const struct lp_rast_triangle *triangle ) +{ + union lp_rast_cmd_arg arg; + arg.triangle = triangle; + return arg; +} + +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_state( const struct lp_rast_state *state ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = state; + return arg; +} + +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_null( void ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = NULL; + return arg; +} + + + + /* Binnable Commands: */ void lp_rast_clear_color( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_clear_zstencil( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_load_color( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_load_zstencil( struct lp_rasterizer *, - const union lp_rast_cmd_arg *); + const union lp_rast_cmd_arg ); void lp_rast_set_state( struct lp_rasterizer *, - const union lp_rast_cmd_arg * ); + const union lp_rast_cmd_arg ); void lp_rast_triangle( struct lp_rasterizer *, - const union lp_rast_cmd_arg * ); + const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer *, - const union lp_rast_cmd_arg *, - const struct lp_rast_shader_inputs *); + const union lp_rast_cmd_arg ); /* End of tile: diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 8cd3fcc360b..efc635bffea 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -155,9 +155,9 @@ do_block( struct lp_rasterizer *rast, * for this triangle: */ void lp_rast_triangle( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { - const struct lp_rast_triangle *tri = arg->triangle; + const struct lp_rast_triangle *tri = arg.triangle; int minx, maxx, miny, maxy; /* Clamp to tile dimensions: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 13b40f14942..c0c294fbe3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -143,7 +143,7 @@ static void reset_context( struct setup_context *setup ) */ static void bin_everywhere( struct setup_context *setup, lp_rast_cmd cmd, - const union lp_rast_cmd_arg *arg ) + const union lp_rast_cmd_arg arg ) { unsigned i, j; for (i = 0; i < setup->tiles_x; i++) @@ -232,18 +232,18 @@ begin_binning( struct setup_context *setup ) if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, - &setup->clear.color ); + setup->clear.color ); else - bin_everywhere( setup, lp_rast_load_color, NULL ); + bin_everywhere( setup, lp_rast_load_color, lp_rast_arg_null() ); } if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, - &setup->clear.zstencil ); + setup->clear.zstencil ); else - bin_everywhere( setup, lp_rast_load_zstencil, NULL ); + bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); } } @@ -329,32 +329,34 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { + if (flags & PIPE_CLEAR_COLOR) { + util_pack_color(color, + setup->fb.cbuf->format, + &setup->clear.color.clear_color ); + } + + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { + setup->clear.zstencil.clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); + } + if (setup->state == SETUP_ACTIVE) { /* Add the clear to existing bins. In the unusual case where * both color and depth-stencilare being cleared, we could * discard the currently binned scene and start again, but I * don't see that as being a common usage. */ - if (flags & PIPE_CLEAR_COLOR) { - union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); - - util_pack_color(color, - setup->fb.cbuf->format, - &arg->clear_color ); - - bin_everywhere(setup, lp_rast_clear_color, arg ); - } - - if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - union lp_rast_cmd_arg *arg = get_data( &setup->data, sizeof *arg ); + if (flags & PIPE_CLEAR_COLOR) + bin_everywhere( setup, + lp_rast_clear_color, + setup->clear.color ); - arg->clear_zstencil = - util_pack_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); - - bin_everywhere(setup, lp_rast_clear_zstencil, arg ); - } + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + bin_everywhere( setup, + lp_rast_clear_zstencil, + setup->clear.zstencil ); } else { /* Put ourselves into the 'pre-clear' state, specifically to try @@ -365,19 +367,6 @@ lp_setup_clear( struct setup_context *setup, set_state( setup, SETUP_CLEARED ); setup->clear.flags |= flags; - - if (flags & PIPE_CLEAR_COLOR) { - util_pack_color(color, - setup->fb.cbuf->format, - &setup->clear.color.clear_color ); - } - - if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - setup->clear.zstencil.clear_zstencil = - util_pack_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); - } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 9411f14cfbb..b29fec8ef05 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -45,11 +45,11 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg * ); +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); struct cmd_block { lp_rast_cmd cmd[CMD_BLOCK_MAX]; - const union lp_rast_cmd_arg *arg[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; @@ -152,7 +152,7 @@ static INLINE void *get_data( struct data_block_list *list, */ static INLINE void bin_command( struct cmd_block_list *list, lp_rast_cmd cmd, - const union lp_rast_cmd_arg *arg ) + union lp_rast_cmd_arg arg ) { if (list->tail->count == CMD_BLOCK_MAX) { lp_setup_new_cmd_block( list ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d3b8ce94345..f927f9df915 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -230,7 +230,10 @@ static inline float subpixel_snap( float a ) } - +static INLINE void bin_triangle( struct cmd_block_list *list, + const struct lp_rast_triangle arg ) +{ +} /* to avoid having to allocate power-of-four, square render targets, @@ -363,7 +366,8 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - bin_command( &setup->tile[minx][miny], lp_rast_triangle, tri ); + bin_command( &setup->tile[minx][miny], lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } else { @@ -412,12 +416,15 @@ do_triangle_ccw(struct setup_context *setup, cx3 + ei3 > 0) { /* shade whole tile */ - bin_command( &setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); + bin_command( &setup->tile[x][y], lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); } else { /* shade partial tile */ - bin_command( &setup->tile[x][y], lp_rast_triangle, tri ); + bin_command( &setup->tile[x][y], + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } /* Iterate cx values across the region: @@ -481,7 +488,7 @@ static void triangle_nop( struct setup_context *setup, void lp_setup_choose_triangle( struct setup_context *setup ) { - switch (setup->cull_mode) { + switch (setup->cullmode) { case PIPE_WINDING_NONE: setup->triangle = triangle_both; break; -- cgit v1.2.3 From e215f94f15fd20919cc0ed500dc2efde4f076516 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 12:19:49 +0100 Subject: llvmpipe: get lp_setup_tri building --- src/gallium/drivers/llvmpipe/lp_rast.h | 14 +++++++++-- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 37 +++++++++++++++++------------ src/gallium/drivers/llvmpipe/lp_setup.c | 6 ++--- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 12 +++++----- 4 files changed, 43 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 44cb4032dad..72f897503d6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -77,6 +77,11 @@ struct lp_rast_shader_inputs { * plus inputs to run the shader: */ struct lp_rast_triangle { + int minx; + int maxx; + int miny; + int maxy; + /* one-pixel sized trivial accept offsets for each plane */ float ei1; float ei2; @@ -97,8 +102,13 @@ struct lp_rast_triangle { float dx23; float dx31; - /* XXX: these are only used inside lp_setup_tri.c, don't really - * need to bin them: + /* edge function values at minx,miny ?? */ + float c1; + float c2; + float c3; + + /* XXX: this is only used inside lp_setup_tri.c, don't really + * need it here: */ float oneoverarea; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index efc635bffea..7110afb9d57 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -158,21 +158,6 @@ void lp_rast_triangle( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_triangle *tri = arg.triangle; - int minx, maxx, miny, maxy; - - /* Clamp to tile dimensions: - */ - minx = MAX2(tri->maxx, rast->x); - miny = MAX2(tri->miny, rast->y); - maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); - maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); - - if (miny == maxy || - minx == maxx) { - debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); - //assert(0); - return; - } const int step = BLOCKSIZE; @@ -191,11 +176,33 @@ void lp_rast_triangle( struct lp_rasterizer *rast, float ystep1 = step * tri->dx12; float ystep2 = step * tri->dx23; float ystep3 = step * tri->dx31; + + /* Clamp to tile dimensions: + */ + int minx = MAX2(tri->maxx, rast->x); + int miny = MAX2(tri->miny, rast->y); + int maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); + int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); + int x, y; + float x0, y0; + float c1, c2, c3; + + if (miny == maxy || minx == maxx) { + debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); + return; + } minx &= ~(step-1); miny &= ~(step-1); + x0 = (float)minx; + y0 = (float)miny; + + c1 = tri->c1 + tri->dx12 * y0 - tri->dy12 * x0; + c2 = tri->c2 + tri->dx23 * y0 - tri->dy23 * x0; + c3 = tri->c3 + tri->dx31 * y0 - tri->dy31 * x0; + for (y = miny; y < maxy; y += step) { float cx1 = c1; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c0c294fbe3f..56bbee1f7cb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -373,9 +373,9 @@ lp_setup_clear( struct setup_context *setup, void -lp_setup_set_tri_state( struct setup_context *setup, - unsigned cull_mode, - boolean ccw_is_frontface) +lp_setup_set_triangle_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface) { setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index f927f9df915..5c402259df0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -315,9 +315,9 @@ do_triangle_ccw(struct setup_context *setup, /* half-edge constants, will be interated over the whole * rendertarget. */ - c1 = tri->dy12 * x1 - tri->dx12 * y1; - c2 = tri->dy23 * x2 - tri->dx23 * y2; - c3 = tri->dy31 * x3 - tri->dx31 * y3; + tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; + tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; + tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; /* correct for top-left fill convention: */ @@ -351,9 +351,9 @@ do_triangle_ccw(struct setup_context *setup, minx &= ~(TILESIZE-1); /* aligned blocks */ miny &= ~(TILESIZE-1); /* aligned blocks */ - c1 += tri->dx12 * miny - tri->dy12 * minx; - c2 += tri->dx23 * miny - tri->dy23 * minx; - c3 += tri->dx31 * miny - tri->dy31 * minx; + c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; + c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; + c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; /* Convert to tile coordinates: */ -- cgit v1.2.3 From c1013f5d404880046f304de706d4216b08bd3011 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 12:26:49 +0100 Subject: llvmpipe: remove dead code --- src/gallium/drivers/llvmpipe/lp_flush.c | 1 + src/gallium/drivers/llvmpipe/lp_jit.c | 18 ------------------ src/gallium/drivers/llvmpipe/lp_jit.h | 4 ---- src/gallium/drivers/llvmpipe/lp_state_surface.c | 6 +++++- 4 files changed, 6 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index d0dd41f09c1..f7a1d897019 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -37,6 +37,7 @@ #include "lp_surface.h" #include "lp_state.h" #include "lp_winsys.h" +#include "lp_setup.h" void diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index a03eb874acb..fb6ec9bb37a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -108,24 +108,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } - /* fetch_texel - */ - { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[3]; - LLVMValueRef fetch_texel; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - arg_types[1] = LLVMInt32Type(); /* unit */ - arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */ - - fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel", - ret_type, arg_types, Elements(arg_types)); - - LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa); - } - #ifdef DEBUG LLVMDumpModule(screen->module); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 207dfbfde10..7eccb5da859 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -118,10 +118,6 @@ typedef void void *color, void *depth); -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ); void diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index bb1396c3ab8..909ca9f1170 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -31,6 +31,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_surface.h" +#include "lp_setup.h" #include "draw/draw_context.h" @@ -82,7 +83,10 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } if (dirty) { - lp_setup_set_framebuffer( lp->setup, fb ); + lp_setup_bind_framebuffer( lp->setup, + fb->cbufs[0], + fb->zsbuf ); + lp->dirty |= LP_NEW_FRAMEBUFFER; } } -- cgit v1.2.3 From c4d54b62f5491dbec9930563209639f8fb7dcf2e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 11:29:33 +0100 Subject: llvmpipe: Eliminate constant mapping/unmapping. --- src/gallium/drivers/llvmpipe/lp_context.h | 3 -- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 54 --------------------------- src/gallium/drivers/llvmpipe/lp_state_fs.c | 20 ++++++++-- 3 files changed, 17 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 852f7a1d05c..e34385bbae0 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -86,9 +86,6 @@ struct llvmpipe_context { /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; - /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES]; - /** Vertex format */ struct vertex_info vertex_info; struct vertex_info vertex_info_vbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 2bffcdb3ba5..b879b5e755e 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,54 +45,6 @@ -static void -llvmpipe_map_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i, size; - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - } - - if (lp->constants[PIPE_SHADER_VERTEX].buffer) - size = lp->constants[PIPE_SHADER_VERTEX].buffer->size; - else - size = 0; - - lp->jit_context.constants = lp->mapped_constants[PIPE_SHADER_FRAGMENT]; - - draw_set_mapped_constant_buffer(lp->draw, - lp->mapped_constants[PIPE_SHADER_VERTEX], - size); -} - - -static void -llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i; - - /* really need to flush all prims since the vert/frag shaders const buffers - * are going away now. - */ - draw_flush(lp->draw); - - draw_set_mapped_constant_buffer(lp->draw, NULL, 0); - - lp->jit_context.constants = NULL; - - for (i = 0; i < 2; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - screen->buffer_unmap(screen, lp->constants[i].buffer); - lp->mapped_constants[i] = NULL; - } -} - - boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -123,8 +75,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, if (lp->dirty) llvmpipe_update_derived( lp ); - llvmpipe_map_constant_buffers(lp); - /* * Map vertex buffers */ @@ -160,10 +110,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - - /* Note: leave drawing surfaces mapped */ - llvmpipe_unmap_constant_buffers(lp); - return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2c8b383123c..59c7afc6f78 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -83,6 +83,7 @@ #include "lp_bld_debug.h" #include "lp_screen.h" #include "lp_context.h" +#include "lp_buffer.h" #include "lp_state.h" #include "lp_tex_sample.h" @@ -670,16 +671,29 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_constant_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct pipe_buffer *buffer = constants ? constants->buffer : NULL; + unsigned size = buffer ? buffer->size : 0; + const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(shader == PIPE_SHADER_VERTEX) + draw_flush(llvmpipe->draw); + /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); + + if(shader == PIPE_SHADER_FRAGMENT) { + llvmpipe->jit_context.constants = data; + } + + if(shader == PIPE_SHADER_VERTEX) { + draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + } llvmpipe->dirty |= LP_NEW_CONSTANTS; } -- cgit v1.2.3 From 00ffef383c62ca6cd0d5687539dc45fecfbefeec Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 13:22:42 +0100 Subject: util: Force ESI register for cpuid's ebx result. Fixes a segfault and better code. Unfortunately using an arbitrary register ("=r") causes the gcc to abort when the code is optimized saying it can't satisfy the constraint. Setting seems to do the trick. --- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 70ce25cfcf4..ded361ce704 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -346,7 +346,7 @@ cpuid(uint32_t ax, uint32_t *p) "cpuid\n\t" "xchgl %%ebx, %1" : "=a" (p[0]), - "=m" (p[1]), + "=S" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax) -- cgit v1.2.3 From d904ed88c1d957f662497343de7dc3e9fa743e47 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 13:41:33 +0100 Subject: llvmpipe: Pass state to setup. --- src/gallium/drivers/llvmpipe/lp_context.h | 3 +- src/gallium/drivers/llvmpipe/lp_setup.c | 99 +++++++++++++++++++++++-- src/gallium/drivers/llvmpipe/lp_setup.h | 23 +++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 ++ src/gallium/drivers/llvmpipe/lp_state.h | 1 + src/gallium/drivers/llvmpipe/lp_state_blend.c | 18 ++--- src/gallium/drivers/llvmpipe/lp_state_derived.c | 18 +++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 7 +- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 10 --- 9 files changed, 150 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index e34385bbae0..17c6939ff5b 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -59,7 +59,7 @@ struct llvmpipe_context { const struct lp_vertex_shader *vs; /** Other rendering state */ - struct pipe_blend_color blend_color[4][16]; + struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; @@ -120,7 +120,6 @@ struct llvmpipe_context { unsigned tex_timestamp; boolean no_rast; - struct lp_jit_context jit_context; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 56bbee1f7cb..f999004a669 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -32,11 +32,15 @@ * lp_setup_flush(). */ -#include "lp_setup_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" -#include "pipe/p_defines.h" +#include "lp_state.h" +#include "lp_buffer.h" +#include "lp_texture.h" +#include "lp_setup_context.h" static void set_state( struct setup_context *, unsigned ); @@ -394,14 +398,99 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_shader_state( struct setup_context *setup, - const struct lp_jit_context *jc ) +lp_setup_set_fs( struct setup_context *setup, + struct lp_fragment_shader *fs ) { - + /* FIXME: reference count */ + + setup->fs.jit_function = fs->current->jit_function; } +void +lp_setup_set_fs_constants(struct setup_context *setup, + struct pipe_buffer *buffer) +{ + const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; + struct pipe_buffer *dummy; + /* FIXME: hold on to the reference */ + dummy = NULL; + pipe_buffer_reference(&dummy, buffer); + setup->fs.jit_context.constants = data; + + setup->fs.jit_context_dirty = TRUE; +} + + +void +lp_setup_set_alpha_ref_value( struct setup_context *setup, + float alpha_ref_value ) +{ + if(setup->fs.jit_context.alpha_ref_value != alpha_ref_value) { + setup->fs.jit_context.alpha_ref_value = alpha_ref_value; + setup->fs.jit_context_dirty = TRUE; + } +} + +void +lp_setup_set_blend_color( struct setup_context *setup, + const struct pipe_blend_color *blend_color ) +{ + unsigned i, j; + + if(!setup->fs.jit_context.blend_color) + setup->fs.jit_context.blend_color = align_malloc(4 * 16, 16); + + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(blend_color->color[i]); + for (j = 0; j < 16; ++j) + setup->fs.jit_context.blend_color[i*4 + j] = c; + } + + setup->fs.jit_context_dirty = TRUE; +} + +void +lp_setup_set_sampler_textures( struct setup_context *setup, + unsigned num, struct pipe_texture **texture) +{ + struct pipe_texture *dummy; + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + /* FIXME: hold on to the reference */ + dummy = NULL; + pipe_texture_reference(&dummy, tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &setup->fs.jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + else + /* FIXME: map the rendertarget */ + assert(0); + } + } + + setup->fs.jit_context_dirty = TRUE; +} + +static void +lp_setup_set_shader_state( struct setup_context *setup, + const struct lp_jit_context *jc ) +{ + + +} /* Stubs for lines & points for now: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index bd439fa8578..ac9c3cc0ee9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -50,7 +50,9 @@ struct lp_shader_input { struct pipe_texture; struct pipe_surface; +struct pipe_blend_color; struct setup_context; +struct lp_fragment_shader; struct lp_jit_context; struct setup_context * @@ -100,8 +102,25 @@ lp_setup_set_fs_inputs( struct setup_context *setup, unsigned nr ); void -lp_setup_set_shader_state( struct setup_context *setup, - const struct lp_jit_context *jc ); +lp_setup_set_fs( struct setup_context *setup, + struct lp_fragment_shader *fs ); + +void +lp_setup_set_fs_constants(struct setup_context *setup, + struct pipe_buffer *buffer); + + +void +lp_setup_set_alpha_ref_value( struct setup_context *setup, + float alpha_ref_value ); + +void +lp_setup_set_blend_color( struct setup_context *setup, + const struct pipe_blend_color *blend_color ); + +void +lp_setup_set_sampler_textures( struct setup_context *setup, + unsigned num, struct pipe_texture **texture); boolean lp_setup_is_texture_referenced( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index b29fec8ef05..2e2380dd806 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -109,6 +109,11 @@ struct setup_context { struct { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; + + struct lp_jit_context jit_context; + lp_jit_frag_func jit_function; + + boolean jit_context_dirty; } fs; void (*point)( struct setup_context *, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index a9980d6f14a..64fe3600f5e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -54,6 +54,7 @@ #define LP_NEW_VERTEX 0x1000 #define LP_NEW_VS 0x2000 #define LP_NEW_QUERY 0x4000 +#define LP_NEW_BLEND_COLOR 0x8000 struct tgsi_sampler; diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 3f03bd00571..48afe5f5242 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -67,17 +67,16 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, const struct pipe_blend_color *blend_color ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - unsigned i, j; + + if(!blend_color) + return; + + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); - if(!llvmpipe->jit_context.blend_color) - llvmpipe->jit_context.blend_color = align_malloc(4 * 16, 16); - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(blend_color->color[i]); - for (j = 0; j < 16; ++j) - llvmpipe->jit_context.blend_color[i*4 + j] = c; - } + llvmpipe->dirty |= LP_NEW_BLEND_COLOR; } @@ -101,9 +100,6 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - if(llvmpipe->depth_stencil) - llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; - llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index b801f054a2f..00903c8ef44 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -33,6 +33,7 @@ #include "draw/draw_private.h" #include "lp_context.h" #include "lp_screen.h" +#include "lp_setup.h" #include "lp_state.h" @@ -256,6 +257,23 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); + if (llvmpipe->dirty & (LP_NEW_BLEND | + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) + llvmpipe_update_fs( llvmpipe ); + + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) + lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); + + if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) + lp_setup_set_alpha_ref_value(llvmpipe->setup, llvmpipe->depth_stencil->alpha.ref_value); + + if (llvmpipe->dirty & LP_NEW_CONSTANTS) + lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT].buffer); + + if (llvmpipe->dirty & LP_NEW_TEXTURE) + lp_setup_set_sampler_textures(llvmpipe->setup, llvmpipe->num_textures, llvmpipe->texture); llvmpipe->dirty = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 59c7afc6f78..63e675e5848 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -681,16 +681,15 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(llvmpipe->constants[shader].buffer == buffer) + return; + if(shader == PIPE_SHADER_VERTEX) draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); - if(shader == PIPE_SHADER_FRAGMENT) { - llvmpipe->jit_context.constants = data; - } - if(shader == PIPE_SHADER_VERTEX) { draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index ae787801eb6..e19394a4c92 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -96,16 +96,6 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&llvmpipe->texture[i], tex); - - if(tex) { - struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); - struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; - jit_tex->stride = lp_tex->stride[0]; - if(!lp_tex->dt) - jit_tex->data = lp_tex->data; - } } llvmpipe->num_textures = num; -- cgit v1.2.3 From c0e3e35b03e6cbed3768cb56e298b6119eafe1ef Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 13:44:24 +0100 Subject: llvmpipe: Add stub lp_setup_is_texture_referenced. --- src/gallium/drivers/llvmpipe/lp_setup.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f999004a669..0a12d93c38e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -484,6 +484,15 @@ lp_setup_set_sampler_textures( struct setup_context *setup, setup->fs.jit_context_dirty = TRUE; } +boolean +lp_setup_is_texture_referenced( struct setup_context *setup, + const struct pipe_texture *texture ) +{ + /* FIXME */ + return PIPE_UNREFERENCED; +} + + static void lp_setup_set_shader_state( struct setup_context *setup, const struct lp_jit_context *jc ) -- cgit v1.2.3 From 1928c965b1fb76987cbc834111bd1d1e1f2cda51 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 13:53:14 +0100 Subject: llvmpipe: fix a couple of warnings --- src/gallium/drivers/llvmpipe/lp_setup.c | 9 --------- src/gallium/drivers/llvmpipe/lp_setup.h | 1 + 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 0a12d93c38e..030a19ef301 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -493,15 +493,6 @@ lp_setup_is_texture_referenced( struct setup_context *setup, } -static void -lp_setup_set_shader_state( struct setup_context *setup, - const struct lp_jit_context *jc ) -{ - - -} - - /* Stubs for lines & points for now: */ void diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index ac9c3cc0ee9..0dedc9e9fe8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -50,6 +50,7 @@ struct lp_shader_input { struct pipe_texture; struct pipe_surface; +struct pipe_buffer; struct pipe_blend_color; struct setup_context; struct lp_fragment_shader; -- cgit v1.2.3 From b0475a4b0d1eaa1179bc399301ed46b0b8e63497 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 13:55:04 +0100 Subject: llvmpipe: fix crash on init --- src/gallium/drivers/llvmpipe/lp_setup.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 030a19ef301..2eef63badcb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -229,8 +229,8 @@ begin_binning( struct setup_context *setup ) setup->fb.zsbuf->height); } - setup->tiles_x = align(setup->fb.width, TILESIZE); - setup->tiles_y = align(setup->fb.height, TILESIZE); + setup->tiles_x = align(setup->fb.width, TILESIZE) / TILESIZE; + setup->tiles_y = align(setup->fb.height, TILESIZE) / TILESIZE; if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) @@ -312,18 +312,10 @@ lp_setup_bind_framebuffer( struct setup_context *setup, struct pipe_surface *color, struct pipe_surface *zstencil ) { - unsigned width, height; - set_state( setup, SETUP_FLUSHED ); pipe_surface_reference( &setup->fb.cbuf, color ); pipe_surface_reference( &setup->fb.zsbuf, zstencil ); - - width = MAX2( color->width, zstencil->width ); - height = MAX2( color->height, zstencil->height ); - - setup->tiles_x = align( width, TILESIZE ) / TILESIZE; - setup->tiles_y = align( height, TILESIZE ) / TILESIZE; } void -- cgit v1.2.3 From 4e1334ced68dd25b151250a44af25e8e0d5a33fe Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:02:39 +0100 Subject: llvmpipe: debug, crash fixes --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 +++++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup.c | 6 ------ 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index de15ddbb2e4..fff292e294c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -33,6 +33,7 @@ #include "lp_tile_soa.h" #include "lp_bld_debug.h" +#define RAST_DEBUG debug_printf struct lp_rasterizer *lp_rast_create( void ) { @@ -53,6 +54,8 @@ void lp_rast_begin( struct lp_rasterizer *rast, unsigned width, unsigned height ) { + RAST_DEBUG("%s %dx%d\n", __FUNCTION__, width, height); + rast->width = width; rast->height = height; rast->check_for_clipped_tiles = (width % TILESIZE != 0 || @@ -63,6 +66,8 @@ void lp_rast_bind_color( struct lp_rasterizer *rast, struct pipe_surface *cbuf, boolean write_color ) { + RAST_DEBUG("%s\n", __FUNCTION__); + pipe_surface_reference(&rast->state.cbuf, cbuf); rast->state.write_color = write_color; } @@ -71,6 +76,8 @@ void lp_rast_bind_zstencil( struct lp_rasterizer *rast, struct pipe_surface *zsbuf, boolean write_zstencil ) { + RAST_DEBUG("%s\n", __FUNCTION__); + pipe_surface_reference(&rast->state.zsbuf, zsbuf); rast->state.write_zstencil = write_zstencil; } @@ -82,6 +89,8 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { + RAST_DEBUG("%s\n", __FUNCTION__); + rast->x = x; rast->y = y; } @@ -91,6 +100,8 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; + RAST_DEBUG("%s\n", __FUNCTION__); + if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { @@ -110,6 +121,8 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, { unsigned i, j; + RAST_DEBUG("%s\n", __FUNCTION__); + for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil; @@ -119,12 +132,16 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, void lp_rast_load_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { + RAST_DEBUG("%s\n", __FUNCTION__); + /* call u_tile func to load colors from surface */ } void lp_rast_load_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { + RAST_DEBUG("%s\n", __FUNCTION__); + /* call u_tile func to load depth (and stencil?) from surface */ } @@ -133,8 +150,9 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { - rast->shader_state = arg.set_state; + RAST_DEBUG("%s\n", __FUNCTION__); + rast->shader_state = arg.set_state; } @@ -145,6 +163,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; + RAST_DEBUG("%s\n", __FUNCTION__); + /* Use the existing preference for 8x2 (four quads) shading: */ for (y = 0; y < TILE_SIZE; y += 2) @@ -218,6 +238,8 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) unsigned h = TILE_SIZE; void *map; + RAST_DEBUG("%s\n", __FUNCTION__); + surface = rast->state.cbuf; if(!surface) return; @@ -256,12 +278,16 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) { + RAST_DEBUG("%s\n", __FUNCTION__); + /* FIXME: call u_tile func to store depth/stencil to surface */ } void lp_rast_end_tile( struct lp_rasterizer *rast ) { + RAST_DEBUG("%s\n", __FUNCTION__); + if (rast->state.write_color) lp_rast_store_color(rast); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 2eef63badcb..009c6419762 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -164,12 +164,6 @@ rasterize_bins( struct setup_context *setup, struct cmd_block *block; unsigned i,j,k; - if (setup->state != SETUP_ACTIVE) { - /* this can happen, not a big deal */ - debug_printf("%s called when not binning\n", __FUNCTION__); - return; - } - lp_rast_begin( rast, setup->fb.width, setup->fb.height ); -- cgit v1.2.3 From 659609e0ae27071a601794935c85547e315dedeb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 14:03:11 +0100 Subject: llvmpipe: Replace util_pack_color with straight float_to_ubyte. --- src/gallium/drivers/llvmpipe/lp_setup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 009c6419762..ec1027bb408 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -319,10 +319,11 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { + unsigned i; + if (flags & PIPE_CLEAR_COLOR) { - util_pack_color(color, - setup->fb.cbuf->format, - &setup->clear.color.clear_color ); + for (i = 0; i < 4; ++i) + setup->clear.color.clear_color[i] = float_to_ubyte(color[i]); } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { -- cgit v1.2.3 From 295aea04895676aae5b67a7016c62bab8e40b996 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:07:25 +0100 Subject: llvmpipe: more debug --- src/gallium/drivers/llvmpipe/lp_rast.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index fff292e294c..beb149ef188 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -89,7 +89,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); rast->x = x; rast->y = y; @@ -100,7 +100,11 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %x,%x,%x,%x\n", __FUNCTION__, + clear_color[0], + clear_color[1], + clear_color[2], + clear_color[3]); if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && @@ -238,7 +242,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) unsigned h = TILE_SIZE; void *map; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); surface = rast->state.cbuf; if(!surface) -- cgit v1.2.3 From e0e2008f1dcd73a59a184e0ef4c1dd77ac2a1cbf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:29:25 +0100 Subject: llvmpipe: trivial/clear works --- src/gallium/drivers/llvmpipe/lp_context.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 120 ++++++++++++++-------------- src/gallium/drivers/llvmpipe/lp_rast.h | 21 +++-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 11 ++- src/gallium/drivers/llvmpipe/lp_setup.c | 16 ++-- src/gallium/drivers/llvmpipe/lp_setup.h | 3 +- 6 files changed, 92 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 7f7b04412c2..06aa0325403 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -179,7 +179,7 @@ llvmpipe_create( struct pipe_screen *screen ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->setup = lp_setup_create(); + llvmpipe->setup = lp_setup_create( screen ); if (!llvmpipe->setup) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index beb149ef188..977f35c46cd 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -35,7 +35,7 @@ #define RAST_DEBUG debug_printf -struct lp_rasterizer *lp_rast_create( void ) +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) { struct lp_rasterizer *rast; @@ -43,6 +43,7 @@ struct lp_rasterizer *lp_rast_create( void ) if(!rast) return NULL; + rast->screen = screen; rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); @@ -50,39 +51,75 @@ struct lp_rasterizer *lp_rast_create( void ) } -void lp_rast_begin( struct lp_rasterizer *rast, - unsigned width, - unsigned height ) +boolean lp_rast_begin( struct lp_rasterizer *rast, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, + boolean write_color, + boolean write_zstencil, + unsigned width, + unsigned height ) { + struct pipe_screen *screen = rast->screen; + RAST_DEBUG("%s %dx%d\n", __FUNCTION__, width, height); + pipe_surface_reference(&rast->state.cbuf, cbuf); + pipe_surface_reference(&rast->state.zsbuf, zsbuf); + rast->width = width; rast->height = height; + rast->state.write_zstencil = write_zstencil; + rast->state.write_color = write_color; + rast->check_for_clipped_tiles = (width % TILESIZE != 0 || height % TILESIZE != 0); -} -void lp_rast_bind_color( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - boolean write_color ) -{ - RAST_DEBUG("%s\n", __FUNCTION__); + if (cbuf) { + rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, width, height); + if (!rast->cbuf_transfer) + return FALSE; + + rast->cbuf_map = screen->transfer_map(rast->screen, + rast->cbuf_transfer); + if (!rast->cbuf_map) + return FALSE; + } - pipe_surface_reference(&rast->state.cbuf, cbuf); - rast->state.write_color = write_color; + return TRUE; } -void lp_rast_bind_zstencil( struct lp_rasterizer *rast, - struct pipe_surface *zsbuf, - boolean write_zstencil ) + +void lp_rast_end( struct lp_rasterizer *rast ) { - RAST_DEBUG("%s\n", __FUNCTION__); + struct pipe_screen *screen = rast->screen; - pipe_surface_reference(&rast->state.zsbuf, zsbuf); - rast->state.write_zstencil = write_zstencil; + if (rast->cbuf_map) + screen->transfer_unmap(screen, rast->cbuf_transfer); + + if (rast->zsbuf_map) + screen->transfer_unmap(screen, rast->zsbuf_transfer); + + if (rast->cbuf_transfer) + screen->tex_transfer_destroy(rast->cbuf_transfer); + + if (rast->zsbuf_transfer) + screen->tex_transfer_destroy(rast->cbuf_transfer); + + rast->cbuf_transfer = NULL; + rast->zsbuf_transfer = NULL; + rast->cbuf_map = NULL; + rast->zsbuf_map = NULL; } + + /* Begining of each tile: */ void lp_rast_start_tile( struct lp_rasterizer *rast, @@ -233,50 +270,17 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, static void lp_rast_store_color( struct lp_rasterizer *rast ) { - struct pipe_surface *surface; - struct pipe_screen *screen; - struct pipe_transfer *transfer; const unsigned x = rast->x; const unsigned y = rast->y; - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - void *map; - - RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - - surface = rast->state.cbuf; - if(!surface) - return; - - screen = surface->texture->screen; - - if(x + w > rast->width) - w = rast->width - x; - if(y + h > rast->height) - h = rast->height - y; - - transfer = screen->get_tex_transfer(screen, - surface->texture, - surface->face, - surface->level, - surface->zslice, - PIPE_TRANSFER_READ_WRITE, - x, y, w, h); - if(!transfer) - return; - - map = screen->transfer_map(screen, transfer); - if(map) { - lp_tile_write_4ub(transfer->format, - rast->tile.color, - map, transfer->stride, - x, y, w, h); - - screen->transfer_unmap(screen, transfer); - } - screen->tex_transfer_destroy(transfer); + RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); + lp_tile_write_4ub(rast->cbuf_transfer->format, + rast->tile.color, + rast->cbuf_map, + rast->cbuf_transfer->stride, + x, y, + TILESIZE, TILESIZE); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 72f897503d6..9dfdf25cda4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -37,6 +37,7 @@ * individual function calls like this. */ struct lp_rasterizer; +struct pipe_screen; #define TILESIZE 64 @@ -118,19 +119,17 @@ struct lp_rast_triangle { -struct lp_rasterizer *lp_rast_create( void ); +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); -void lp_rast_begin( struct lp_rasterizer *, - unsigned width, - unsigned height); +boolean lp_rast_begin( struct lp_rasterizer *rast, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf, + boolean write_color, + boolean write_zstencil, + unsigned width, + unsigned height ); -void lp_rast_bind_color( struct lp_rasterizer *, - struct pipe_surface *cbuf, - boolean write_when_done ); - -void lp_rast_bind_zstencil( struct lp_rasterizer *, - struct pipe_surface *zsbuf, - boolean write_when_done ); +void lp_rast_end( struct lp_rasterizer * ); /* Begining of each tile: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f5a6699ed42..eae8138aaf1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -30,6 +30,8 @@ #include "lp_rast.h" +struct pipe_transfer; +struct pipe_screen; /* We can choose whatever layout for the internal tile storage we * prefer: @@ -49,7 +51,6 @@ struct lp_rasterizer { */ struct lp_rast_tile tile; - unsigned x; unsigned y; boolean clipped_tile; @@ -57,7 +58,13 @@ struct lp_rasterizer { boolean check_for_clipped_tiles; unsigned width; unsigned height; - + + struct pipe_screen *screen; + struct pipe_transfer *cbuf_transfer; + struct pipe_transfer *zsbuf_transfer; + void *cbuf_map; + void *zsbuf_map; + struct { struct pipe_surface *cbuf; struct pipe_surface *zsbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ec1027bb408..ba9d8010324 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -165,16 +165,14 @@ rasterize_bins( struct setup_context *setup, unsigned i,j,k; lp_rast_begin( rast, + setup->fb.cbuf, + setup->fb.zsbuf, + setup->fb.cbuf != NULL, + setup->fb.zsbuf != NULL && write_depth, setup->fb.width, setup->fb.height ); - lp_rast_bind_color( rast, - setup->fb.cbuf, - setup->fb.cbuf != NULL ); - lp_rast_bind_zstencil( rast, - setup->fb.zsbuf, - setup->fb.zsbuf != NULL && write_depth ); for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { @@ -193,6 +191,8 @@ rasterize_bins( struct setup_context *setup, } } + lp_rast_end( rast ); + reset_context( setup ); } @@ -528,12 +528,12 @@ lp_setup_destroy( struct setup_context *setup ) * rasterizer to use with it. */ struct setup_context * -lp_setup_create( void ) +lp_setup_create( struct pipe_screen *screen ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); unsigned i, j; - setup->rast = lp_rast_create(); + setup->rast = lp_rast_create( screen ); if (!setup->rast) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 0dedc9e9fe8..1edd7410fc0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -52,12 +52,13 @@ struct pipe_texture; struct pipe_surface; struct pipe_buffer; struct pipe_blend_color; +struct pipe_screen; struct setup_context; struct lp_fragment_shader; struct lp_jit_context; struct setup_context * -lp_setup_create( void ); +lp_setup_create( struct pipe_screen *screen ); void lp_setup_clear(struct setup_context *setup, -- cgit v1.2.3 From 608c22272327d3b554c7665b60f6322716e5fd9d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 14:30:25 +0100 Subject: llvmpipe: Put jit_context in store. --- src/gallium/drivers/llvmpipe/lp_setup.c | 26 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ba9d8010324..8a9c1696345 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -480,12 +480,36 @@ lp_setup_is_texture_referenced( struct setup_context *setup, } +static INLINE void +lp_setup_update_shader_state( struct setup_context *setup ) +{ + + if(setup->fs.jit_context_dirty) { + if(!setup->fs.last_jc || + memcmp(setup->fs.last_jc, &setup->fs.jit_context, sizeof *setup->fs.last_jc)) { + struct lp_jit_context *jc; + + jc = get_data(&setup->data, sizeof *jc); + if(jc) { + memcpy(jc, &setup->fs.jit_context, sizeof *jc); + setup->fs.last_jc = jc; + } + } + + setup->fs.jit_context_dirty = FALSE; + } + + assert(setup->fs.last_jc); +} + + /* Stubs for lines & points for now: */ void lp_setup_point(struct setup_context *setup, const float (*v0)[4]) { + lp_setup_update_shader_state(setup); setup->point( setup, v0 ); } @@ -494,6 +518,7 @@ lp_setup_line(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { + lp_setup_update_shader_state(setup); setup->line( setup, v0, v1 ); } @@ -503,6 +528,7 @@ lp_setup_tri(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + lp_setup_update_shader_state(setup); setup->triangle( setup, v0, v1, v2 ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 2e2380dd806..747e90fe202 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -114,6 +114,8 @@ struct setup_context { lp_jit_frag_func jit_function; boolean jit_context_dirty; + + const struct lp_jit_context *last_jc; } fs; void (*point)( struct setup_context *, -- cgit v1.2.3 From 163a31952c903034c8a70213b344e1b2ef287270 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:33:57 +0100 Subject: llvmpipe: initialize setup line/tri/point funcs --- src/gallium/drivers/llvmpipe/lp_setup.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8a9c1696345..47839869ac6 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -568,6 +568,10 @@ lp_setup_create( struct pipe_screen *screen ) setup->tile[i][j].head = setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); + setup->triangle = first_triangle; + setup->line = first_line; + setup->point = first_point; + return setup; fail: -- cgit v1.2.3 From 082b3b0a895615a60a7eae40fea14bf231960dba Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:36:41 +0100 Subject: llvmpipe: initialize setup data store --- src/gallium/drivers/llvmpipe/lp_setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 47839869ac6..336a8b4e5b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -568,6 +568,9 @@ lp_setup_create( struct pipe_screen *screen ) setup->tile[i][j].head = setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); + setup->data.head = + setup->data.tail = CALLOC_STRUCT(data_block); + setup->triangle = first_triangle; setup->line = first_line; setup->point = first_point; -- cgit v1.2.3 From dec35d04aeb398eef159aaf8cde5e0d04622b811 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 14:59:35 +0100 Subject: llvmpipe: add LP_DEBUG env var --- src/gallium/drivers/llvmpipe/lp_debug.h | 71 ++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_screen.c | 22 +++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 67 +++++++++++++--------------- 3 files changed, 124 insertions(+), 36 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_debug.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h new file mode 100644 index 00000000000..74b27574942 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_DEBUG_H +#define LP_DEBUG_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + +extern void +st_print_current(void); + + +#define DEBUG_PIPE 0x1 +#define DEBUG_TGSI 0x2 +#define DEBUG_TEX 0x4 +#define DEBUG_ASM 0x8 +#define DEBUG_SETUP 0x10 +#define DEBUG_RAST 0x20 +#define DEBUG_QUERY 0x40 +#define DEBUG_SCREEN 0x80 +#define DEBUG_JIT 0x100 + +#ifdef DEBUG +extern int LP_DEBUG; +#else +#define LP_DEBUG 0 +#endif + +void st_debug_init( void ); + +static INLINE void +LP_DBG( unsigned flag, const char *fmt, ... ) +{ + if (LP_DEBUG & flag) + { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + + +#endif /* LP_DEBUG_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 05189274589..87fddbd13f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -35,6 +35,24 @@ #include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" +#include "lp_debug.h" + +#ifdef DEBUG +int LP_DEBUG = 0; + +static const struct debug_named_value lp_debug_flags[] = { + { "pipe", DEBUG_PIPE }, + { "tgsi", DEBUG_TGSI }, + { "tex", DEBUG_TEX }, + { "asm", DEBUG_ASM }, + { "setup", DEBUG_SETUP }, + { "rast", DEBUG_RAST }, + { "query", DEBUG_QUERY }, + { "screen", DEBUG_SCREEN }, + { "jit", DEBUG_JIT }, + {NULL, 0} +}; +#endif static const char * @@ -213,6 +231,10 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) { struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); +#ifdef DEBUG + LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); +#endif + if (!screen) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 63e675e5848..a12581a4868 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -86,6 +86,7 @@ #include "lp_buffer.h" #include "lp_state.h" #include "lp_tex_sample.h" +#include "lp_debug.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -396,29 +397,29 @@ generate_fragment(struct llvmpipe_context *lp, unsigned i; unsigned chan; -#ifdef DEBUG - tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.blend_enable) { - debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + if (LP_DEBUG & DEBUG_JIT) { + tgsi_dump(shader->base.tokens, 0); + if(key->depth.enabled) { + debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + if(key->alpha.enabled) { + debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); + debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); + } + if(key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); + } + else if(key->blend.blend_enable) { + debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); + debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); } - debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); -#endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) @@ -509,13 +510,8 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); -#if 0 - /* C texture sampling */ - sampler = lp_c_sampler_soa_create(context_ptr); -#else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); -#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -558,8 +554,8 @@ generate_fragment(struct llvmpipe_context *lp, } lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); + fs_mask, num_fs, + &blend_mask, 1); /* * Blending. @@ -588,16 +584,15 @@ generate_fragment(struct llvmpipe_context *lp, LLVMRunFunctionPassManager(screen->pass, variant->function); -#ifdef DEBUG - LLVMDumpValue(variant->function); - debug_printf("\n"); -#endif + if (LP_DEBUG & DEBUG_JIT) { + LLVMDumpValue(variant->function); + debug_printf("\n"); + } variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); -#ifdef DEBUG - lp_disassemble(variant->jit_function); -#endif + if (LP_DEBUG & DEBUG_ASM) + lp_disassemble(variant->jit_function); variant->next = shader->variants; shader->variants = variant; -- cgit v1.2.3 From 402c189af7e95be99ba2e5fd71a71987ffd73c2f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 15:07:18 +0100 Subject: llvmpipe: always call begin_binning on transition to active state --- src/gallium/drivers/llvmpipe/lp_setup.c | 39 +++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 336a8b4e5b5..793b71e0953 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -42,6 +42,8 @@ #include "lp_texture.h" #include "lp_setup_context.h" +#define SETUP_DEBUG debug_printf + static void set_state( struct setup_context *, unsigned ); void lp_setup_new_cmd_block( struct cmd_block_list *list ) @@ -97,6 +99,8 @@ static void reset_context( struct setup_context *setup ) { unsigned i, j; + SETUP_DEBUG("%s\n", __FUNCTION__); + /* Free binner command lists: */ for (i = 0; i < setup->tiles_x; i++) { @@ -164,6 +168,8 @@ rasterize_bins( struct setup_context *setup, struct cmd_block *block; unsigned i,j,k; + SETUP_DEBUG("%s\n", __FUNCTION__); + lp_rast_begin( rast, setup->fb.cbuf, setup->fb.zsbuf, @@ -201,6 +207,8 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + if (!setup->fb.cbuf && !setup->fb.zsbuf) { setup->fb.width = 0; setup->fb.height = 0; @@ -254,6 +262,8 @@ begin_binning( struct setup_context *setup ) static void execute_clears( struct setup_context *setup ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + begin_binning( setup ); rasterize_bins( setup, TRUE ); } @@ -268,10 +278,11 @@ set_state( struct setup_context *setup, if (old_state == new_state) return; + SETUP_DEBUG("%s old %d new %d\n", __FUNCTION__, old_state, new_state); + switch (new_state) { case SETUP_ACTIVE: - if (old_state == SETUP_FLUSHED) - begin_binning( setup ); + begin_binning( setup ); break; case SETUP_CLEARED: @@ -297,6 +308,8 @@ void lp_setup_flush( struct setup_context *setup, unsigned flags ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + set_state( setup, SETUP_FLUSHED ); } @@ -306,6 +319,8 @@ lp_setup_bind_framebuffer( struct setup_context *setup, struct pipe_surface *color, struct pipe_surface *zstencil ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + set_state( setup, SETUP_FLUSHED ); pipe_surface_reference( &setup->fb.cbuf, color ); @@ -321,6 +336,9 @@ lp_setup_clear( struct setup_context *setup, { unsigned i; + SETUP_DEBUG("%s state %d\n", __FUNCTION__, setup->state); + + if (flags & PIPE_CLEAR_COLOR) { for (i = 0; i < 4; ++i) setup->clear.color.clear_color[i] = float_to_ubyte(color[i]); @@ -368,6 +386,8 @@ lp_setup_set_triangle_state( struct setup_context *setup, unsigned cull_mode, boolean ccw_is_frontface) { + SETUP_DEBUG("%s\n", __FUNCTION__); + setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; @@ -380,6 +400,8 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + memcpy( setup->fs.input, input, nr * sizeof input[0] ); setup->fs.nr_inputs = nr; } @@ -388,6 +410,7 @@ void lp_setup_set_fs( struct setup_context *setup, struct lp_fragment_shader *fs ) { + SETUP_DEBUG("%s\n", __FUNCTION__); /* FIXME: reference count */ setup->fs.jit_function = fs->current->jit_function; @@ -400,6 +423,8 @@ lp_setup_set_fs_constants(struct setup_context *setup, const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; struct pipe_buffer *dummy; + SETUP_DEBUG("%s\n", __FUNCTION__); + /* FIXME: hold on to the reference */ dummy = NULL; pipe_buffer_reference(&dummy, buffer); @@ -414,6 +439,8 @@ void lp_setup_set_alpha_ref_value( struct setup_context *setup, float alpha_ref_value ) { + SETUP_DEBUG("%s\n", __FUNCTION__); + if(setup->fs.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.jit_context.alpha_ref_value = alpha_ref_value; setup->fs.jit_context_dirty = TRUE; @@ -426,6 +453,8 @@ lp_setup_set_blend_color( struct setup_context *setup, { unsigned i, j; + SETUP_DEBUG("%s\n", __FUNCTION__); + if(!setup->fs.jit_context.blend_color) setup->fs.jit_context.blend_color = align_malloc(4 * 16, 16); @@ -445,6 +474,9 @@ lp_setup_set_sampler_textures( struct setup_context *setup, struct pipe_texture *dummy; unsigned i; + SETUP_DEBUG("%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SAMPLERS); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { @@ -483,6 +515,7 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { + SETUP_DEBUG("%s\n", __FUNCTION__); if(setup->fs.jit_context_dirty) { if(!setup->fs.last_jc || @@ -528,6 +561,8 @@ lp_setup_tri(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + SETUP_DEBUG("%s\n", __FUNCTION__); + lp_setup_update_shader_state(setup); setup->triangle( setup, v0, v1, v2 ); } -- cgit v1.2.3 From da1808ccc9a53fdd5aa69efa800ece0d3d075b07 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 15:10:15 +0100 Subject: llvmpipe: just bin whole tiles for now --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 2 ++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 7110afb9d57..1041cd2463c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -188,6 +188,8 @@ void lp_rast_triangle( struct lp_rasterizer *rast, float x0, y0; float c1, c2, c3; + debug_printf("%s\n", __FUNCTION__); + if (miny == maxy || minx == maxx) { debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); return; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 5c402259df0..857fb6a9f88 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -421,10 +421,15 @@ do_triangle_ccw(struct setup_context *setup, } else { +#if 1 + bin_command( &setup->tile[x][y], lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); +#else /* shade partial tile */ bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); +#endif } /* Iterate cx values across the region: -- cgit v1.2.3 From f406ffaea62005157f56ea17709291326c4dca8a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 15:29:10 +0100 Subject: llvmpipe: set block count/used values back to zero on reset --- src/gallium/drivers/llvmpipe/lp_setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 793b71e0953..03c54798dca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -115,6 +115,7 @@ static void reset_context( struct setup_context *setup ) } list->head = list->tail; + list->head->count = 0; } } @@ -130,6 +131,7 @@ static void reset_context( struct setup_context *setup ) } list->head = list->tail; + list->head->used = 0; } /* Reset some state: -- cgit v1.2.3 From 85999695829823e459e11822b4846ed1db5c055d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 15:52:18 +0100 Subject: llvmpipe: Get jit_context/jit_function across the rasterizer. --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 +++++-------- src/gallium/drivers/llvmpipe/lp_rast.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 - src/gallium/drivers/llvmpipe/lp_setup.c | 53 ++++++++++++++----------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 9 ++--- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 ++ 7 files changed, 50 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 977f35c46cd..cba50c80491 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -188,14 +188,6 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, /* Within a tile: */ -void lp_rast_set_state( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg arg ) -{ - RAST_DEBUG("%s\n", __FUNCTION__); - - rast->shader_state = arg.set_state; -} - void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) @@ -219,7 +211,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, const unsigned *masks) { - const struct lp_rast_state *state = rast->shader_state; + const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; @@ -249,17 +241,17 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, assert(lp_check_alignment(depth, 16)); assert(lp_check_alignment(color, 16)); - assert(lp_check_alignment(state->jc.blend_color, 16)); + assert(lp_check_alignment(state->jit_context.blend_color, 16)); /* run shader */ - state->shader( &state->jc, - x, y, - inputs->a0, - inputs->dadx, - inputs->dady, - &mask[0][0], - color, - depth); + state->jit_function( &state->jit_context, + x, y, + inputs->a0, + inputs->dadx, + inputs->dady, + &mask[0][0], + color, + depth); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 9dfdf25cda4..f371b709df3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -45,12 +45,12 @@ struct pipe_screen; struct lp_rast_state { /* State for the shader: */ - struct lp_jit_context jc; + struct lp_jit_context jit_context; /* The shader itself. Probably we also need to pass a pointer to * the tile color/z/stencil data somehow: */ - lp_jit_frag_func shader; + lp_jit_frag_func jit_function; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index eae8138aaf1..11e8e78e798 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -74,8 +74,6 @@ struct lp_rasterizer { unsigned clear_depth; char clear_stencil; } state; - - const struct lp_rast_state *shader_state; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 03c54798dca..428d2d00850 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -415,7 +415,7 @@ lp_setup_set_fs( struct setup_context *setup, SETUP_DEBUG("%s\n", __FUNCTION__); /* FIXME: reference count */ - setup->fs.jit_function = fs->current->jit_function; + setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; } void @@ -431,9 +431,9 @@ lp_setup_set_fs_constants(struct setup_context *setup, dummy = NULL; pipe_buffer_reference(&dummy, buffer); - setup->fs.jit_context.constants = data; + setup->fs.current.jit_context.constants = data; - setup->fs.jit_context_dirty = TRUE; + setup->fs.dirty = TRUE; } @@ -443,9 +443,9 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup, { SETUP_DEBUG("%s\n", __FUNCTION__); - if(setup->fs.jit_context.alpha_ref_value != alpha_ref_value) { - setup->fs.jit_context.alpha_ref_value = alpha_ref_value; - setup->fs.jit_context_dirty = TRUE; + if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { + setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; + setup->fs.dirty = TRUE; } } @@ -457,16 +457,16 @@ lp_setup_set_blend_color( struct setup_context *setup, SETUP_DEBUG("%s\n", __FUNCTION__); - if(!setup->fs.jit_context.blend_color) - setup->fs.jit_context.blend_color = align_malloc(4 * 16, 16); + if(!setup->fs.current.jit_context.blend_color) + setup->fs.current.jit_context.blend_color = align_malloc(4 * 16, 16); for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(blend_color->color[i]); for (j = 0; j < 16; ++j) - setup->fs.jit_context.blend_color[i*4 + j] = c; + setup->fs.current.jit_context.blend_color[i*4 + j] = c; } - setup->fs.jit_context_dirty = TRUE; + setup->fs.dirty = TRUE; } void @@ -490,7 +490,8 @@ lp_setup_set_sampler_textures( struct setup_context *setup, if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); - struct lp_jit_texture *jit_tex = &setup->fs.jit_context.textures[i]; + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; jit_tex->width = tex->width[0]; jit_tex->height = tex->height[0]; jit_tex->stride = lp_tex->stride[0]; @@ -502,7 +503,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, } } - setup->fs.jit_context_dirty = TRUE; + setup->fs.dirty = TRUE; } boolean @@ -519,22 +520,28 @@ lp_setup_update_shader_state( struct setup_context *setup ) { SETUP_DEBUG("%s\n", __FUNCTION__); - if(setup->fs.jit_context_dirty) { - if(!setup->fs.last_jc || - memcmp(setup->fs.last_jc, &setup->fs.jit_context, sizeof *setup->fs.last_jc)) { - struct lp_jit_context *jc; - - jc = get_data(&setup->data, sizeof *jc); - if(jc) { - memcpy(jc, &setup->fs.jit_context, sizeof *jc); - setup->fs.last_jc = jc; + assert(setup->fs.current.jit_function); + + if(setup->fs.dirty) { + if(!setup->fs.stored || + memcmp(setup->fs.stored, + &setup->fs.current, + sizeof setup->fs.current) != 0) { + struct lp_rast_state *stored; + + stored = get_data(&setup->data, sizeof *stored); + if(stored) { + memcpy(stored, + &setup->fs.current, + sizeof setup->fs.current); + setup->fs.stored = stored; } } - setup->fs.jit_context_dirty = FALSE; + setup->fs.dirty = FALSE; } - assert(setup->fs.last_jc); + assert(setup->fs.stored); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 747e90fe202..c15a59e4d1e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -110,12 +110,9 @@ struct setup_context { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; - struct lp_jit_context jit_context; - lp_jit_frag_func jit_function; - - boolean jit_context_dirty; - - const struct lp_jit_context *last_jc; + const struct lp_rast_state *stored; + struct lp_rast_state current; + boolean dirty; } fs; void (*point)( struct setup_context *, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 857fb6a9f88..78e53292ece 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -272,6 +272,8 @@ do_triangle_ccw(struct setup_context *setup, float c1, c2, c3; int minx, maxx, miny, maxy; + tri->inputs.state = setup->fs.stored; + tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; tri->dx31 = x3 - x1; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index a12581a4868..0541d36580c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -84,6 +84,7 @@ #include "lp_screen.h" #include "lp_context.h" #include "lp_buffer.h" +#include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" #include "lp_debug.h" @@ -765,4 +766,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) variant = generate_fragment(lp, shader, &key); shader->current = variant; + + lp_setup_set_fs(lp->setup, shader); } -- cgit v1.2.3 From 82ec7f018d20e46e9c43ea467354dcfe4f03bae3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 16:05:26 +0100 Subject: llvmpipe: correct binning maths for iterating over whole tiles --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 +++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 12 ++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index cba50c80491..d4f369d4d02 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -192,6 +192,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { +#if 0 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; @@ -203,6 +204,10 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) lp_rast_shade_quads( rast, inputs, x, y, masks); +#else + RAST_DEBUG("%s\n", __FUNCTION__); + memset(rast->tile.color, 0x80, TILE_SIZE * TILE_SIZE * 4); +#endif } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 78e53292ece..c4379403817 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -357,13 +357,13 @@ do_triangle_ccw(struct setup_context *setup, c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; - /* Convert to tile coordinates: - */ minx /= TILESIZE; - maxx /= TILESIZE; miny /= TILESIZE; + maxx /= TILESIZE; maxy /= TILESIZE; - + + /* Convert to tile coordinates: + */ if (miny == maxy && minx == maxx) { /* Triangle is contained in a single tile: @@ -399,13 +399,13 @@ do_triangle_ccw(struct setup_context *setup, * Trivially accept or reject blocks, else jump to per-pixel * examination above. */ - for (y = miny; y < maxy; y++) + for (y = miny; y <= maxy; y++) { float cx1 = c1; float cx2 = c2; float cx3 = c3; - for (x = minx; x < maxx; x++) + for (x = minx; x <= maxx; x++) { if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From b0cd386e777912df115858d90f2eec31811c2d9c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 16:18:03 +0100 Subject: llvmpipe: Tell setup shader inputs. --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 00903c8ef44..a18efcc0e0f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -144,6 +144,36 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) } draw_compute_vertex_size(vinfo); + + { + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; + + for (i = 0; i < lpfs->info.num_inputs; i++) { + switch (vinfo->attrib[i].interp_mode) { + case INTERP_CONSTANT: + inputs[i].interp = LP_INTERP_CONSTANT; + break; + case INTERP_LINEAR: + inputs[i].interp = LP_INTERP_LINEAR; + break; + case INTERP_PERSPECTIVE: + inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + case INTERP_POS: + inputs[i].interp = LP_INTERP_POSITION; + break; + default: + assert(0); + } + + if (lpfs->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + inputs[i].interp = LP_INTERP_FACING; + + inputs[i].src_index = vinfo->attrib[i].src_index; + } + + lp_setup_set_fs_inputs(llvmpipe->setup, inputs, lpfs->info.num_inputs); + } } return vinfo; -- cgit v1.2.3 From c2e926b72de21bfac0048f32e1204537446d5ab0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 16:18:19 +0100 Subject: llvmpipe: Pass framebuffer coords to shader. --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d4f369d4d02..e73331535fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -250,7 +250,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* run shader */ state->jit_function( &state->jit_context, - x, y, + rast->x + x, rast->y + y, inputs->a0, inputs->dadx, inputs->dady, -- cgit v1.2.3 From 05131f7502150968d7ee19673676f74d4c2fd22b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 16:19:00 +0100 Subject: llvmpipe: properly clip tile writes --- src/gallium/drivers/llvmpipe/lp_rast.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index e73331535fb..3585011ace0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -269,15 +269,23 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) { const unsigned x = rast->x; const unsigned y = rast->y; + unsigned w = TILESIZE; + unsigned h = TILESIZE; - RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); + if (x + w > rast->width) + w -= x + w - rast->width; + + if (y + h > rast->height) + h -= y + h - rast->height; + + RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, rast->tile.color, rast->cbuf_map, rast->cbuf_transfer->stride, x, y, - TILESIZE, TILESIZE); + w, h); } -- cgit v1.2.3 From 8c34c86d191fc703670d4e1e1ae4719cb39f8828 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 16:21:25 +0100 Subject: llvmpipe: Undo debug override. --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3585011ace0..38c3aea921c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -192,7 +192,6 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { -#if 0 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned masks[4] = {~0, ~0, ~0, ~0}; unsigned x, y; @@ -204,10 +203,6 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) lp_rast_shade_quads( rast, inputs, x, y, masks); -#else - RAST_DEBUG("%s\n", __FUNCTION__); - memset(rast->tile.color, 0x80, TILE_SIZE * TILE_SIZE * 4); -#endif } -- cgit v1.2.3 From 6464ec48366fee201c61a481c3205a64279797b2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 16:50:32 +0100 Subject: llvmpipe: Remove partial tile override. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index c4379403817..3cb7a286049 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -423,15 +423,10 @@ do_triangle_ccw(struct setup_context *setup, } else { -#if 1 - bin_command( &setup->tile[x][y], lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); -#else /* shade partial tile */ bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); -#endif } /* Iterate cx values across the region: -- cgit v1.2.3 From b4924d62c7346da2e0de9ae4f9f23b3fb7fafee8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 16:59:24 +0100 Subject: llvmpipe: fill in tri min/max values --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 3cb7a286049..1725614902b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -306,6 +306,11 @@ do_triangle_ccw(struct setup_context *setup, if (miny == maxy || minx == maxx) return; + tri->miny = miny; + tri->minx = minx; + tri->maxy = maxy; + tri->maxx = maxx; + /* The only divide in this code. Is it really needed? */ tri->oneoverarea = 1.0f / area; -- cgit v1.2.3 From 8c80413360855106734068066382be8c3a46a64f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 17:14:11 +0100 Subject: llvmpipe: Fix type in tri bounding box check. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 1041cd2463c..6b5bee4af3c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -179,7 +179,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, /* Clamp to tile dimensions: */ - int minx = MAX2(tri->maxx, rast->x); + int minx = MAX2(tri->minx, rast->x); int miny = MAX2(tri->miny, rast->y); int maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); -- cgit v1.2.3 From 61f3eeb6403e404d297bdcd924c215ed36060945 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 19:16:36 +0100 Subject: llvmpipe: Use framebuffer coords consistently. --- src/gallium/drivers/llvmpipe/lp_rast.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 38c3aea921c..2038403c8fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -202,7 +202,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) - lp_rast_shade_quads( rast, inputs, x, y, masks); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, masks); } @@ -211,6 +211,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, const unsigned *masks) { +#if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; @@ -218,23 +219,27 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; + unsigned ix, iy; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); + ix = x % TILE_SIZE; + iy = y % TILE_SIZE; + /* mask */ for (q = 0; q < 4; ++q) for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) mask[q][chan_index] = masks[q] & (1 << chan_index) ? ~0 : 0; /* color buffer */ - color = &TILE_PIXEL(tile->color, x, y, 0); + color = &TILE_PIXEL(tile->color, ix, iy, 0); /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = tile->depth + y*TILE_SIZE + 2*x; + depth = tile->depth + iy*TILE_SIZE + 2*ix; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); @@ -245,14 +250,30 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* run shader */ state->jit_function( &state->jit_context, - rast->x + x, rast->y + y, + x, y, inputs->a0, inputs->dadx, inputs->dady, &mask[0][0], color, depth); +#else + struct lp_rast_tile *tile = &rast->tile; + unsigned chan_index; + unsigned q, ix, iy; + + x %= TILE_SIZE; + y %= TILE_SIZE; + + /* mask */ + for (q = 0; q < 4; ++q) + for(iy = 0; iy < 2; ++iy) + for(ix = 0; ix < 2; ++ix) + if(masks[q] & (1 << (iy*2 + ix))) + for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) + TILE_PIXEL(tile->color, x + q*2 + ix, y + iy, chan_index) = 0xff; +#endif } -- cgit v1.2.3 From 7908c239e0fdc11d878b8c68d126c3364af0ee24 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 19:17:30 +0100 Subject: llvmpipe: Additional checks for binner block lists. --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 428d2d00850..a74756de7ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -101,7 +101,7 @@ static void reset_context( struct setup_context *setup ) SETUP_DEBUG("%s\n", __FUNCTION__); - /* Free binner command lists: + /* Free all but last binner command lists: */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { @@ -114,12 +114,13 @@ static void reset_context( struct setup_context *setup ) FREE(block); } + assert(list->tail->next == NULL); list->head = list->tail; list->head->count = 0; } } - /* Free binned data: + /* Free all but last binned data block: */ { struct data_block_list *list = &setup->data; @@ -130,6 +131,7 @@ static void reset_context( struct setup_context *setup ) FREE(block); } + assert(list->tail->next == NULL); list->head = list->tail; list->head->used = 0; } @@ -588,6 +590,8 @@ lp_setup_destroy( struct setup_context *setup ) for (j = 0; j < TILES_Y; j++) FREE(setup->tile[i][j].head); + FREE(setup->data.head); + lp_rast_destroy( setup->rast ); FREE( setup ); } -- cgit v1.2.3 From 0177c6e66cfddeb62feca86e7bd5ae763b9b5244 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 10 Oct 2009 18:44:46 +0100 Subject: llvmpipe: Only invoke the shader if necessary. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 6b5bee4af3c..f096972d638 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -140,7 +140,8 @@ do_block( struct lp_rasterizer *rast, cx3 += xstep3; } - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); + if(masks[0] || masks[1] || masks[2] || masks[3]) + lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); c1 += ystep1; c2 += ystep2; -- cgit v1.2.3 From 2e3580d994e2caf6d81763803c8525a7ed42b8fd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 18 Oct 2009 11:57:43 +0100 Subject: llvmpipe: Maintain a copy of the shader constants to prevent clobbering. --- src/gallium/drivers/llvmpipe/lp_setup.c | 64 +++++++++++++++++++------ src/gallium/drivers/llvmpipe/lp_setup_context.h | 13 ++++- 2 files changed, 61 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index a74756de7ca..08dac459db3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -101,6 +101,11 @@ static void reset_context( struct setup_context *setup ) SETUP_DEBUG("%s\n", __FUNCTION__); + /* Reset derived data */ + pipe_buffer_reference(&setup->constants.current, NULL); + setup->constants.stored_size = 0; + setup->constants.stored_data = NULL; + /* Free all but last binner command lists: */ for (i = 0; i < setup->tiles_x; i++) { @@ -424,18 +429,11 @@ void lp_setup_set_fs_constants(struct setup_context *setup, struct pipe_buffer *buffer) { - const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; - struct pipe_buffer *dummy; - SETUP_DEBUG("%s\n", __FUNCTION__); - /* FIXME: hold on to the reference */ - dummy = NULL; - pipe_buffer_reference(&dummy, buffer); + pipe_buffer_reference(&setup->constants.current, buffer); - setup->fs.current.jit_context.constants = data; - - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_CONSTANTS; } @@ -447,7 +445,7 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup, if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_FS; } } @@ -468,7 +466,7 @@ lp_setup_set_blend_color( struct setup_context *setup, setup->fs.current.jit_context.blend_color[i*4 + j] = c; } - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_FS; } void @@ -505,7 +503,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, } } - setup->fs.dirty = TRUE; + setup->dirty |= LP_SETUP_NEW_FS; } boolean @@ -524,7 +522,43 @@ lp_setup_update_shader_state( struct setup_context *setup ) assert(setup->fs.current.jit_function); - if(setup->fs.dirty) { + if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { + struct pipe_buffer *buffer = setup->constants.current; + + if(buffer) { + unsigned current_size = buffer->size; + const void *current_data = llvmpipe_buffer(buffer)->data; + + /* TODO: copy only the actually used constants? */ + + if(setup->constants.stored_size != current_size || + !setup->constants.stored_data || + memcmp(setup->constants.stored_data, + current_data, + current_size) != 0) { + void *stored; + + stored = get_data(&setup->data, current_size); + if(stored) { + memcpy(stored, + current_data, + current_size); + setup->constants.stored_size = current_size; + setup->constants.stored_data = stored; + } + } + } + else { + setup->constants.stored_size = 0; + setup->constants.stored_data = NULL; + } + + setup->fs.current.jit_context.constants = setup->constants.stored_data; + setup->dirty |= LP_SETUP_NEW_FS; + } + + + if(setup->dirty & LP_SETUP_NEW_FS) { if(!setup->fs.stored || memcmp(setup->fs.stored, &setup->fs.current, @@ -539,10 +573,10 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; } } - - setup->fs.dirty = FALSE; } + setup->dirty = 0; + assert(setup->fs.stored); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index c15a59e4d1e..82ec71f1000 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -43,6 +43,10 @@ #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) +#define LP_SETUP_NEW_FS 0x01 +#define LP_SETUP_NEW_CONSTANTS 0x02 + + /* switch to a non-pointer value for this: */ typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); @@ -112,9 +116,16 @@ struct setup_context { const struct lp_rast_state *stored; struct lp_rast_state current; - boolean dirty; } fs; + struct { + struct pipe_buffer *current; + unsigned stored_size; + const void *stored_data; + } constants; + + unsigned dirty; + void (*point)( struct setup_context *, const float (*v0)[4]); -- cgit v1.2.3 From d9f44abe3bb0c9897937ef7f343a7896a0b4cbf0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 11:23:43 +0100 Subject: llvmpipe: fix typo correcting for fill convention Adjustments for top-left fill convention were being lost. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 1725614902b..961bd103a7b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -269,8 +269,8 @@ do_triangle_ccw(struct setup_context *setup, struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; - float c1, c2, c3; int minx, maxx, miny, maxy; + float c1, c2, c3; tri->inputs.state = setup->fs.stored; @@ -328,9 +328,9 @@ do_triangle_ccw(struct setup_context *setup, /* correct for top-left fill convention: */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to -- cgit v1.2.3 From 269342d916fff3bf0fa0a5c1f26aec30b62ed352 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 11:29:05 +0100 Subject: llvmpipe: correctly scale top/left fill adjustments Was overdoing it previously. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 961bd103a7b..89b2b4eb37c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -328,9 +328,9 @@ do_triangle_ccw(struct setup_context *setup, /* correct for top-left fill convention: */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1 += 1.0/16.0f; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2 += 1.0/16.0f; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3 += 1.0/16.0f; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to -- cgit v1.2.3 From f2be08ae0e20b3da8ff684ffeb94412cc6a5a5a1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 19 Oct 2009 11:53:22 +0100 Subject: llvmpipe: Allocate the blend color from the data store, and ensure it's aligned. --- src/gallium/drivers/llvmpipe/lp_setup.c | 34 +++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 28 ++++++++++++++++++-- 2 files changed, 50 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 08dac459db3..da5a68cd40d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -105,6 +105,7 @@ static void reset_context( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); setup->constants.stored_size = 0; setup->constants.stored_data = NULL; + setup->dirty = ~0; /* Free all but last binner command lists: */ @@ -453,20 +454,14 @@ void lp_setup_set_blend_color( struct setup_context *setup, const struct pipe_blend_color *blend_color ) { - unsigned i, j; - SETUP_DEBUG("%s\n", __FUNCTION__); - if(!setup->fs.current.jit_context.blend_color) - setup->fs.current.jit_context.blend_color = align_malloc(4 * 16, 16); + assert(blend_color); - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(blend_color->color[i]); - for (j = 0; j < 16; ++j) - setup->fs.current.jit_context.blend_color[i*4 + j] = c; + if(memcmp(&setup->blend_color.current, blend_color, sizeof *blend_color) != 0) { + memcpy(&setup->blend_color.current, blend_color, sizeof *blend_color); + setup->dirty |= LP_SETUP_NEW_BLEND_COLOR; } - - setup->dirty |= LP_SETUP_NEW_FS; } void @@ -522,6 +517,25 @@ lp_setup_update_shader_state( struct setup_context *setup ) assert(setup->fs.current.jit_function); + if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) { + uint8_t *stored; + unsigned i, j; + + stored = get_data_aligned(&setup->data, 4 * 16, 16); + + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); + for (j = 0; j < 16; ++j) + stored[i*4 + j] = c; + } + + setup->blend_color.stored = stored; + + setup->fs.current.jit_context.blend_color = setup->blend_color.stored; + setup->dirty |= LP_SETUP_NEW_FS; + } + + if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { struct pipe_buffer *buffer = setup->constants.current; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 82ec71f1000..bcd3b9b7aa3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -43,8 +43,9 @@ #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) -#define LP_SETUP_NEW_FS 0x01 -#define LP_SETUP_NEW_CONSTANTS 0x02 +#define LP_SETUP_NEW_FS 0x01 +#define LP_SETUP_NEW_CONSTANTS 0x02 +#define LP_SETUP_NEW_BLEND_COLOR 0x04 /* switch to a non-pointer value for this: @@ -124,6 +125,11 @@ struct setup_context { const void *stored_data; } constants; + struct { + struct pipe_blend_color current; + uint8_t *stored; + } blend_color; + unsigned dirty; void (*point)( struct setup_context *, @@ -163,6 +169,24 @@ static INLINE void *get_data( struct data_block_list *list, } } +static INLINE void *get_data_aligned( struct data_block_list *list, + unsigned size, + unsigned alignment ) +{ + + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + lp_setup_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + tail->used += offset + size; + return data + offset; + } +} + /* Add a command to a given bin. */ static INLINE void bin_command( struct cmd_block_list *list, -- cgit v1.2.3 From 301c1494b27ad92ff1237909f9c98c1660be8fc1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 19 Oct 2009 13:14:33 +0100 Subject: llvmpipe: Reset the pointer to stored jit context. --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index da5a68cd40d..c0f516e12c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -101,10 +101,10 @@ static void reset_context( struct setup_context *setup ) SETUP_DEBUG("%s\n", __FUNCTION__); - /* Reset derived data */ - pipe_buffer_reference(&setup->constants.current, NULL); + /* Reset derived state */ setup->constants.stored_size = 0; setup->constants.stored_data = NULL; + setup->fs.stored = NULL; setup->dirty = ~0; /* Free all but last binner command lists: @@ -634,6 +634,8 @@ lp_setup_destroy( struct setup_context *setup ) reset_context( setup ); + pipe_buffer_reference(&setup->constants.current, NULL); + for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) FREE(setup->tile[i][j].head); @@ -671,6 +673,8 @@ lp_setup_create( struct pipe_screen *screen ) setup->line = first_line; setup->point = first_point; + setup->dirty = ~0; + return setup; fail: -- cgit v1.2.3 From 0580079864c41c236a4167a1543b1a2fc5090362 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 12:24:18 +0100 Subject: llvmpipe: fixed-point rasterization --- src/gallium/drivers/llvmpipe/lp_rast.h | 35 +++--- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 88 +++++++------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 9 ++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 145 ++++++++++++------------ 4 files changed, 147 insertions(+), 130 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index f371b709df3..97250071199 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -39,7 +39,10 @@ struct lp_rasterizer; struct pipe_screen; -#define TILESIZE 64 +#define FIXED_ORDER 4 +#define FIXED_ONE (1<dy12; - float xstep2 = -tri->dy23; - float xstep3 = -tri->dy31; + const int xstep1 = -tri->dy12 * FIXED_ONE; + const int xstep2 = -tri->dy23 * FIXED_ONE; + const int xstep3 = -tri->dy31 * FIXED_ONE; - float ystep1 = tri->dx12; - float ystep2 = tri->dx23; - float ystep3 = tri->dx31; + const int ystep1 = tri->dx12 * FIXED_ONE; + const int ystep2 = tri->dx23 * FIXED_ONE; + const int ystep3 = tri->dx31 * FIXED_ONE; unsigned mask = 0; @@ -108,26 +108,26 @@ static void do_block( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y, - float c1, - float c2, - float c3 ) + int c1, + int c2, + int c3 ) { - const int step = 2; + const int step = 2 * FIXED_ONE; - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; + const int xstep1 = -step * tri->dy12; + const int xstep2 = -step * tri->dy23; + const int xstep3 = -step * tri->dy31; - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + const int ystep1 = step * tri->dx12; + const int ystep2 = step * tri->dx23; + const int ystep3 = step * tri->dx31; int ix, iy; for (iy = 0; iy < BLOCKSIZE; iy += 2) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; unsigned masks[4] = {0, 0, 0, 0}; @@ -160,23 +160,23 @@ void lp_rast_triangle( struct lp_rasterizer *rast, { const struct lp_rast_triangle *tri = arg.triangle; - const int step = BLOCKSIZE; + const int step = BLOCKSIZE * FIXED_ONE; - float ei1 = tri->ei1 * step; - float ei2 = tri->ei2 * step; - float ei3 = tri->ei3 * step; + int ei1 = tri->ei1 * step; + int ei2 = tri->ei2 * step; + int ei3 = tri->ei3 * step; - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; + int eo1 = tri->eo1 * step; + int eo2 = tri->eo2 * step; + int eo3 = tri->eo3 * step; - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; + int xstep1 = -step * tri->dy12; + int xstep2 = -step * tri->dy23; + int xstep3 = -step * tri->dy31; - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + int ystep1 = step * tri->dx12; + int ystep2 = step * tri->dx23; + int ystep3 = step * tri->dx31; /* Clamp to tile dimensions: */ @@ -186,8 +186,8 @@ void lp_rast_triangle( struct lp_rasterizer *rast, int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); int x, y; - float x0, y0; - float c1, c2, c3; + int x0, y0; + int c1, c2, c3; debug_printf("%s\n", __FUNCTION__); @@ -196,23 +196,23 @@ void lp_rast_triangle( struct lp_rasterizer *rast, return; } - minx &= ~(step-1); - miny &= ~(step-1); + minx &= ~(BLOCKSIZE-1); + miny &= ~(BLOCKSIZE-1); - x0 = (float)minx; - y0 = (float)miny; + x0 = minx << FIXED_ORDER; + y0 = miny << FIXED_ORDER; c1 = tri->c1 + tri->dx12 * y0 - tri->dy12 * x0; c2 = tri->c2 + tri->dx23 * y0 - tri->dy23 * x0; c3 = tri->c3 + tri->dx31 * y0 - tri->dy31 * x0; - for (y = miny; y < maxy; y += step) + for (y = miny; y < maxy; y += BLOCKSIZE) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; - for (x = minx; x < maxx; x += step) + for (x = minx; x < maxx; x += BLOCKSIZE) { if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index bcd3b9b7aa3..d91ffc7c20a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -169,6 +169,15 @@ static INLINE void *get_data( struct data_block_list *list, } } +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static INLINE void putback_data( struct data_block_list *list, + unsigned size) +{ + list->tail->used -= size; +} + + static INLINE void *get_data_aligned( struct data_block_list *list, unsigned size, unsigned alignment ) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 89b2b4eb37c..44386a225d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -223,10 +223,9 @@ static void setup_tri_coefficients( struct setup_context *setup, /* XXX: do this by add/subtracting a large floating point number: */ -static inline float subpixel_snap( float a ) +static inline int subpixel_snap( float a ) { - int i = a * 16; - return (float)i * (1.0/16); + return util_iround(FIXED_ONE * a); } @@ -256,23 +255,18 @@ do_triangle_ccw(struct setup_context *setup, const float (*v3)[4], boolean frontfacing ) { - const int rt_width = setup->fb.width; - const int rt_height = setup->fb.height; - const float y1 = subpixel_snap(v1[0][1]); - const float y2 = subpixel_snap(v2[0][1]); - const float y3 = subpixel_snap(v3[0][1]); + const int y1 = subpixel_snap(v1[0][1]); + const int y2 = subpixel_snap(v2[0][1]); + const int y3 = subpixel_snap(v3[0][1]); - const float x1 = subpixel_snap(v1[0][0]); - const float x2 = subpixel_snap(v2[0][0]); - const float x3 = subpixel_snap(v3[0][0]); + const int x1 = subpixel_snap(v1[0][0]); + const int x2 = subpixel_snap(v2[0][0]); + const int x3 = subpixel_snap(v3[0][0]); struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; int minx, maxx, miny, maxy; - float c1, c2, c3; - - tri->inputs.state = setup->fs.stored; tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; @@ -285,35 +279,32 @@ do_triangle_ccw(struct setup_context *setup, area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); - /* Cull non-ccw and zero-sized triangles. + /* Cull non-ccw and zero-sized triangles. + * + * XXX: subject to overflow?? */ - if (area <= 0 || util_is_inf_or_nan(area)) + if (area <= 0) { + putback_data( &setup->data, sizeof *tri ); return; + } // Bounding rectangle - minx = util_iround(MIN3(x1, x2, x3) - .5); - maxx = util_iround(MAX3(x1, x2, x3) + .5); - miny = util_iround(MIN3(y1, y2, y3) - .5); - maxy = util_iround(MAX3(y1, y2, y3) + .5); + tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER; + tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER; + tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER; + tri->maxy = (MAX3(y1, y2, y3) + 0xf) >> FIXED_ORDER; - /* Clamp to framebuffer (or tile) dimensions: - */ - miny = MAX2(0, miny); - minx = MAX2(0, minx); - maxy = MIN2(rt_height, maxy); - maxx = MIN2(rt_width, maxx); - - if (miny == maxy || minx == maxx) + if (tri->miny == tri->maxy || + tri->minx == tri->maxx) { + putback_data( &setup->data, sizeof *tri ); return; + } - tri->miny = miny; - tri->minx = minx; - tri->maxy = maxy; - tri->maxx = maxx; + tri->inputs.state = setup->fs.stored; - /* The only divide in this code. Is it really needed? + /* */ - tri->oneoverarea = 1.0f / area; + tri->oneoverarea = ((float)FIXED_ONE) / (float)area; /* Setup parameter interpolants: */ @@ -328,9 +319,9 @@ do_triangle_ccw(struct setup_context *setup, /* correct for top-left fill convention: */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1 += 1.0/16.0f; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2 += 1.0/16.0f; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3 += 1.0/16.0f; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to @@ -355,17 +346,10 @@ do_triangle_ccw(struct setup_context *setup, tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; - minx &= ~(TILESIZE-1); /* aligned blocks */ - miny &= ~(TILESIZE-1); /* aligned blocks */ - - c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; - c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; - c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; - - minx /= TILESIZE; - miny /= TILESIZE; - maxx /= TILESIZE; - maxy /= TILESIZE; + minx = tri->minx / TILESIZE; + miny = tri->miny / TILESIZE; + maxx = tri->maxx / TILESIZE; + maxy = tri->maxy / TILESIZE; /* Convert to tile coordinates: */ @@ -378,23 +362,31 @@ do_triangle_ccw(struct setup_context *setup, } else { - const int step = TILESIZE; - - float ei1 = tri->ei1 * step; - float ei2 = tri->ei2 * step; - float ei3 = tri->ei3 * step; - - float eo1 = tri->eo1 * step; - float eo2 = tri->eo2 * step; - float eo3 = tri->eo3 * step; - - float xstep1 = -step * tri->dy12; - float xstep2 = -step * tri->dy23; - float xstep3 = -step * tri->dy31; - - float ystep1 = step * tri->dx12; - float ystep2 = step * tri->dx23; - float ystep3 = step * tri->dx31; + int c1 = (tri->c1 + + tri->dx12 * miny * TILESIZE * FIXED_ONE - + tri->dy12 * minx * TILESIZE * FIXED_ONE); + int c2 = (tri->c2 + + tri->dx23 * miny * TILESIZE * FIXED_ONE - + tri->dy23 * minx * TILESIZE * FIXED_ONE); + int c3 = (tri->c3 + + tri->dx31 * miny * TILESIZE * FIXED_ONE - + tri->dy31 * minx * TILESIZE * FIXED_ONE); + + int ei1 = tri->ei1 << (FIXED_ORDER + TILE_ORDER); + int ei2 = tri->ei2 << (FIXED_ORDER + TILE_ORDER); + int ei3 = tri->ei3 << (FIXED_ORDER + TILE_ORDER); + + int eo1 = tri->eo1 << (FIXED_ORDER + TILE_ORDER); + int eo2 = tri->eo2 << (FIXED_ORDER + TILE_ORDER); + int eo3 = tri->eo3 << (FIXED_ORDER + TILE_ORDER); + + int xstep1 = -(tri->dy12 << (FIXED_ORDER + TILE_ORDER)); + int xstep2 = -(tri->dy23 << (FIXED_ORDER + TILE_ORDER)); + int xstep3 = -(tri->dy31 << (FIXED_ORDER + TILE_ORDER)); + + int ystep1 = tri->dx12 << (FIXED_ORDER + TILE_ORDER); + int ystep2 = tri->dx23 << (FIXED_ORDER + TILE_ORDER); + int ystep3 = tri->dx31 << (FIXED_ORDER + TILE_ORDER); int x, y; @@ -406,12 +398,25 @@ do_triangle_ccw(struct setup_context *setup, */ for (y = miny; y <= maxy; y++) { - float cx1 = c1; - float cx2 = c2; - float cx3 = c3; + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; for (x = minx; x <= maxx; x++) { + assert(cx1 == + tri->c1 + + tri->dx12 * y * TILESIZE * FIXED_ONE - + tri->dy12 * x * TILESIZE * FIXED_ONE); + assert(cx2 == + tri->c2 + + tri->dx23 * y * TILESIZE * FIXED_ONE - + tri->dy23 * x * TILESIZE * FIXED_ONE); + assert(cx3 == + tri->c3 + + tri->dx31 * y * TILESIZE * FIXED_ONE - + tri->dy31 * x * TILESIZE * FIXED_ONE); + if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || cx3 + eo3 < 0) @@ -427,9 +432,9 @@ do_triangle_ccw(struct setup_context *setup, lp_rast_arg_inputs(&tri->inputs) ); } else - { + { /* shade partial tile */ - bin_command( &setup->tile[x][y], + bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); } -- cgit v1.2.3 From 2f5f357c5b67869e75087fc1f17ed0d666fb134e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 19 Oct 2009 14:02:01 +0100 Subject: llvmpipe: Reshape the shader input from 8x2 to 4x4. Incorrect rendering until the interpolation code generation is updated. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 19 ++++++++----------- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 4 ++-- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 6 ++++-- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index f9a8be20c70..c63aa22198b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -34,7 +34,7 @@ #include "lp_tile_soa.h" -#define BLOCKSIZE 8 +#define BLOCKSIZE 4 /* Convert 8x8 block into four runs of quads and render each in turn. @@ -55,11 +55,9 @@ static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */ - int iy; + const unsigned masks[4] = {~0, ~0, ~0, ~0}; - for (iy = 0; iy < 4; iy += 2) - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y, masks); } #endif @@ -124,30 +122,29 @@ do_block( struct lp_rasterizer *rast, int ix, iy; + unsigned masks[2][2] = {{0, 0}, {0, 0}}; + for (iy = 0; iy < BLOCKSIZE; iy += 2) { int cx1 = c1; int cx2 = c2; int cx3 = c3; - unsigned masks[4] = {0, 0, 0, 0}; - for (ix = 0; ix < BLOCKSIZE; ix += 2) { - masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); + masks[iy >> 1][ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); cx1 += xstep1; cx2 += xstep2; cx3 += xstep3; } - if(masks[0] || masks[1] || masks[2] || masks[3]) - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); - c1 += ystep1; c2 += ystep2; c3 += ystep3; } + if(masks[0][0] || masks[0][1] || masks[1][0] || masks[1][1]) + lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 040b01865dd..d72d6d2ef15 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -46,8 +46,8 @@ struct pipe_transfer; #define TILE_SIZE 64 -#define TILE_VECTOR_HEIGHT 2 -#define TILE_VECTOR_WIDTH 8 +#define TILE_VECTOR_HEIGHT 4 +#define TILE_VECTOR_WIDTH 4 extern const unsigned char tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 004c5c979e3..a603b7f9f42 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -259,8 +259,10 @@ def main(): print print 'const unsigned char' print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' - print ' { 0, 1, 4, 5, 8, 9, 12, 13},' - print ' { 2, 3, 6, 7, 10, 11, 14, 15}' + print ' { 0, 1, 4, 5},' + print ' { 2, 3, 6, 7},' + print ' { 8, 9, 12, 13},' + print ' { 10, 11, 14, 15}' print '};' print -- cgit v1.2.3 From 3fd6b724cc406573cf53684cd72fa7f60b65354a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 14:55:05 +0100 Subject: llvmpipe: pre-multiply some constants by fixed_one --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 26 ++++++-------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 56 ++++++++++++++++------------- 2 files changed, 43 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c63aa22198b..17ebce4c85e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -66,13 +66,13 @@ do_quad( const struct lp_rast_triangle *tri, int x, int y, int c1, int c2, int c3 ) { - const int xstep1 = -tri->dy12 * FIXED_ONE; - const int xstep2 = -tri->dy23 * FIXED_ONE; - const int xstep3 = -tri->dy31 * FIXED_ONE; + const int xstep1 = -tri->dy12 ; + const int xstep2 = -tri->dy23 ; + const int xstep3 = -tri->dy31 ; - const int ystep1 = tri->dx12 * FIXED_ONE; - const int ystep2 = tri->dx23 * FIXED_ONE; - const int ystep3 = tri->dx31 * FIXED_ONE; + const int ystep1 = tri->dx12 ; + const int ystep2 = tri->dx23 ; + const int ystep3 = tri->dx31 ; unsigned mask = 0; @@ -110,7 +110,7 @@ do_block( struct lp_rasterizer *rast, int c2, int c3 ) { - const int step = 2 * FIXED_ONE; + const int step = 2 ; const int xstep1 = -step * tri->dy12; const int xstep2 = -step * tri->dy23; @@ -157,7 +157,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, { const struct lp_rast_triangle *tri = arg.triangle; - const int step = BLOCKSIZE * FIXED_ONE; + const int step = BLOCKSIZE; int ei1 = tri->ei1 * step; int ei2 = tri->ei2 * step; @@ -183,7 +183,6 @@ void lp_rast_triangle( struct lp_rasterizer *rast, int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); int x, y; - int x0, y0; int c1, c2, c3; debug_printf("%s\n", __FUNCTION__); @@ -196,12 +195,9 @@ void lp_rast_triangle( struct lp_rasterizer *rast, minx &= ~(BLOCKSIZE-1); miny &= ~(BLOCKSIZE-1); - x0 = minx << FIXED_ORDER; - y0 = miny << FIXED_ORDER; - - c1 = tri->c1 + tri->dx12 * y0 - tri->dy12 * x0; - c2 = tri->c2 + tri->dx23 * y0 - tri->dy23 * x0; - c3 = tri->c3 + tri->dx31 * y0 - tri->dy31 * x0; + c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; + c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; + c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; for (y = miny; y < maxy; y += BLOCKSIZE) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 44386a225d1..6c9f75e90ce 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -323,6 +323,14 @@ do_triangle_ccw(struct setup_context *setup, if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; + tri->dy12 *= FIXED_ONE; + tri->dy23 *= FIXED_ONE; + tri->dy31 *= FIXED_ONE; + + tri->dx12 *= FIXED_ONE; + tri->dx23 *= FIXED_ONE; + tri->dx31 *= FIXED_ONE; + /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if @@ -363,30 +371,30 @@ do_triangle_ccw(struct setup_context *setup, else { int c1 = (tri->c1 + - tri->dx12 * miny * TILESIZE * FIXED_ONE - - tri->dy12 * minx * TILESIZE * FIXED_ONE); + tri->dx12 * miny * TILESIZE - + tri->dy12 * minx * TILESIZE); int c2 = (tri->c2 + - tri->dx23 * miny * TILESIZE * FIXED_ONE - - tri->dy23 * minx * TILESIZE * FIXED_ONE); + tri->dx23 * miny * TILESIZE - + tri->dy23 * minx * TILESIZE); int c3 = (tri->c3 + - tri->dx31 * miny * TILESIZE * FIXED_ONE - - tri->dy31 * minx * TILESIZE * FIXED_ONE); + tri->dx31 * miny * TILESIZE - + tri->dy31 * minx * TILESIZE); - int ei1 = tri->ei1 << (FIXED_ORDER + TILE_ORDER); - int ei2 = tri->ei2 << (FIXED_ORDER + TILE_ORDER); - int ei3 = tri->ei3 << (FIXED_ORDER + TILE_ORDER); + int ei1 = tri->ei1 << TILE_ORDER; + int ei2 = tri->ei2 << TILE_ORDER; + int ei3 = tri->ei3 << TILE_ORDER; - int eo1 = tri->eo1 << (FIXED_ORDER + TILE_ORDER); - int eo2 = tri->eo2 << (FIXED_ORDER + TILE_ORDER); - int eo3 = tri->eo3 << (FIXED_ORDER + TILE_ORDER); + int eo1 = tri->eo1 << TILE_ORDER; + int eo2 = tri->eo2 << TILE_ORDER; + int eo3 = tri->eo3 << TILE_ORDER; - int xstep1 = -(tri->dy12 << (FIXED_ORDER + TILE_ORDER)); - int xstep2 = -(tri->dy23 << (FIXED_ORDER + TILE_ORDER)); - int xstep3 = -(tri->dy31 << (FIXED_ORDER + TILE_ORDER)); + int xstep1 = -(tri->dy12 << TILE_ORDER); + int xstep2 = -(tri->dy23 << TILE_ORDER); + int xstep3 = -(tri->dy31 << TILE_ORDER); - int ystep1 = tri->dx12 << (FIXED_ORDER + TILE_ORDER); - int ystep2 = tri->dx23 << (FIXED_ORDER + TILE_ORDER); - int ystep3 = tri->dx31 << (FIXED_ORDER + TILE_ORDER); + int ystep1 = tri->dx12 << TILE_ORDER; + int ystep2 = tri->dx23 << TILE_ORDER; + int ystep3 = tri->dx31 << TILE_ORDER; int x, y; @@ -406,16 +414,16 @@ do_triangle_ccw(struct setup_context *setup, { assert(cx1 == tri->c1 + - tri->dx12 * y * TILESIZE * FIXED_ONE - - tri->dy12 * x * TILESIZE * FIXED_ONE); + tri->dx12 * y * TILESIZE - + tri->dy12 * x * TILESIZE); assert(cx2 == tri->c2 + - tri->dx23 * y * TILESIZE * FIXED_ONE - - tri->dy23 * x * TILESIZE * FIXED_ONE); + tri->dx23 * y * TILESIZE - + tri->dy23 * x * TILESIZE); assert(cx3 == tri->c3 + - tri->dx31 * y * TILESIZE * FIXED_ONE - - tri->dy31 * x * TILESIZE * FIXED_ONE); + tri->dx31 * y * TILESIZE - + tri->dy31 * x * TILESIZE); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From b0828b0adc7438ef33f9393f839226ef7dfda0dc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 16:41:27 +0100 Subject: llvmpipe: calculate masks in format desired by shader Also remove branches calculating masks for quads. --- src/gallium/drivers/llvmpipe/lp_rast.c | 21 ++++---- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 82 ++++++++++++----------------- 3 files changed, 44 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2038403c8fd..01f46dcab10 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,7 +193,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned masks[4] = {~0, ~0, ~0, ~0}; + static const uint32_t ALIGN16_ATTRIB masks[4][4] = + { {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0} }; + unsigned x, y; RAST_DEBUG("%s\n", __FUNCTION__); @@ -202,23 +207,20 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, masks); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); } void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const unsigned *masks) + const uint32_t *masks) { #if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; - unsigned chan_index; - unsigned q; unsigned ix, iy; /* Sanity checks */ @@ -228,11 +230,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, ix = x % TILE_SIZE; iy = y % TILE_SIZE; - /* mask */ - for (q = 0; q < 4; ++q) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = masks[q] & (1 << chan_index) ? ~0 : 0; - /* color buffer */ color = &TILE_PIXEL(tile->color, ix, iy, 0); @@ -254,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - &mask[0][0], + masks, color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 11e8e78e798..f438faaf36c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer { void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const unsigned *masks); + const uint32_t *masks); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 17ebce4c85e..5f22aca668a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,34 +37,26 @@ #define BLOCKSIZE 4 -/* Convert 8x8 block into four runs of quads and render each in turn. +/* Render a 4x4 unmasked block: */ -#if (BLOCKSIZE == 8) static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - const unsigned masks[4] = {~0, ~0, ~0, ~0}; - int iy; + static const uint32_t ALIGN16_ATTRIB masks[4][4] = + { {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0} }; - for (iy = 0; iy < 8; iy += 2) - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); } -#else -static void block_full( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, - int x, int y ) -{ - const unsigned masks[4] = {~0, ~0, ~0, ~0}; - lp_rast_shade_quads(rast, &tri->inputs, x, y, masks); -} -#endif -static INLINE unsigned +static INLINE void do_quad( const struct lp_rast_triangle *tri, - int x, int y, - int c1, int c2, int c3 ) + int c1, int c2, int c3, + int32_t *mask ) { const int xstep1 = -tri->dy12 ; const int xstep2 = -tri->dy23 ; @@ -73,30 +65,22 @@ do_quad( const struct lp_rast_triangle *tri, const int ystep1 = tri->dx12 ; const int ystep2 = tri->dx23 ; const int ystep3 = tri->dx31 ; - - unsigned mask = 0; - - if (c1 > 0 && - c2 > 0 && - c3 > 0) - mask |= 1; - - if (c1 + xstep1 > 0 && - c2 + xstep2 > 0 && - c3 + xstep3 > 0) - mask |= 2; - - if (c1 + ystep1 > 0 && - c2 + ystep2 > 0 && - c3 + ystep3 > 0) - mask |= 4; - - if (c1 + ystep1 + xstep1 > 0 && - c2 + ystep2 + xstep2 > 0 && - c3 + ystep3 + xstep3 > 0) - mask |= 8; - - return mask; + + mask[0] = ~(((c1) | + (c2) | + (c3)) >> 31); + + mask[1] = ~(((c1 + xstep1) | + (c2 + xstep2) | + (c3 + xstep3)) >> 31); + + mask[2] = ~(((c1 + ystep1) | + (c2 + ystep2) | + (c3 + ystep3)) >> 31); + + mask[3] = ~(((c1 + ystep1 + xstep1) | + (c2 + ystep2 + xstep2) | + (c3 + ystep3 + xstep3)) >> 31); } /* Evaluate each pixel in a block, generate a mask and possibly render @@ -121,17 +105,17 @@ do_block( struct lp_rasterizer *rast, const int ystep3 = step * tri->dx31; int ix, iy; + uint32_t ALIGN16_ATTRIB mask[4][4]; - unsigned masks[2][2] = {{0, 0}, {0, 0}}; - for (iy = 0; iy < BLOCKSIZE; iy += 2) { + for (iy = 0; iy < 4; iy += 2) { int cx1 = c1; int cx2 = c2; int cx3 = c3; - for (ix = 0; ix < BLOCKSIZE; ix += 2) { + for (ix = 0; ix < 2; ix ++) { - masks[iy >> 1][ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); + do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); cx1 += xstep1; cx2 += xstep2; @@ -143,8 +127,10 @@ do_block( struct lp_rasterizer *rast, c3 += ystep3; } - if(masks[0][0] || masks[0][1] || masks[1][0] || masks[1][1]) - lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); + /* As we do trivial reject already, masks should rarely be all + * zero: + */ + lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); } -- cgit v1.2.3 From 5b07d4de38b732f99237161d940f40e3ce6e29c3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 17:10:48 +0100 Subject: llvmpipe: remove a leftover 8x2 usage --- src/gallium/drivers/llvmpipe/lp_rast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 01f46dcab10..85b756e4535 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -203,10 +203,10 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, RAST_DEBUG("%s\n", __FUNCTION__); - /* Use the existing preference for 8x2 (four quads) shading: + /* Use the existing preference for 4x4 (four quads) shading: */ - for (y = 0; y < TILE_SIZE; y += 2) - for (x = 0; x < TILE_SIZE; x += 8) + for (y = 0; y < TILE_SIZE; y += 4) + for (x = 0; x < TILE_SIZE; x += 4) lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); } @@ -239,7 +239,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, depth = tile->depth + iy*TILE_SIZE + 2*ix; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(mask, 16)); + assert(lp_check_alignment(masks, 16)); assert(lp_check_alignment(depth, 16)); assert(lp_check_alignment(color, 16)); -- cgit v1.2.3 From 7670628061c2a6ce0a1a787556b0e33a38fd3049 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 02:46:00 +0100 Subject: llvmpipe: precalculate some offsets --- src/gallium/drivers/llvmpipe/lp_rast.c | 20 ++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 2 + src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 80 +++++------------------------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 ++++++++++ 5 files changed, 51 insertions(+), 79 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 85b756e4535..39fb8cdb6ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,12 +193,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; - + const unsigned mask = ~0; unsigned x, y; RAST_DEBUG("%s\n", __FUNCTION__); @@ -207,26 +202,31 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 4) for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask); } void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks) + unsigned mask) { #if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - unsigned ix, iy; + uint32_t ALIGN16_ATTRIB masks[16]; + unsigned ix, iy, i; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); + /* mask */ + for (i = 0; i < 16; ++i) + masks[i] = mask & (1 << i) ? ~0 : 0; + ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -251,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - masks, + &masks[0], color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 97250071199..318bf73b159 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -111,6 +111,8 @@ struct lp_rast_triangle { int c2; int c3; + int step[3][16]; + /* XXX: this is only used inside lp_setup_tri.c, don't really * need it here: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f438faaf36c..2333729807e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer { void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks); + unsigned masks); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5f22aca668a..b5a3753a881 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,100 +37,44 @@ #define BLOCKSIZE 4 + /* Render a 4x4 unmasked block: */ static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; + unsigned mask = ~0; - lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask); } -static INLINE void -do_quad( const struct lp_rast_triangle *tri, - int c1, int c2, int c3, - int32_t *mask ) -{ - const int xstep1 = -tri->dy12 ; - const int xstep2 = -tri->dy23 ; - const int xstep3 = -tri->dy31 ; - - const int ystep1 = tri->dx12 ; - const int ystep2 = tri->dx23 ; - const int ystep3 = tri->dx31 ; - - mask[0] = ~(((c1) | - (c2) | - (c3)) >> 31); - - mask[1] = ~(((c1 + xstep1) | - (c2 + xstep2) | - (c3 + xstep3)) >> 31); - - mask[2] = ~(((c1 + ystep1) | - (c2 + ystep2) | - (c3 + ystep3)) >> 31); - - mask[3] = ~(((c1 + ystep1 + xstep1) | - (c2 + ystep2 + xstep2) | - (c3 + ystep3 + xstep3)) >> 31); -} /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void do_block( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, + const struct lp_rast_triangle *tri, int x, int y, int c1, int c2, int c3 ) { - const int step = 2 ; - - const int xstep1 = -step * tri->dy12; - const int xstep2 = -step * tri->dy23; - const int xstep3 = -step * tri->dy31; - - const int ystep1 = step * tri->dx12; - const int ystep2 = step * tri->dx23; - const int ystep3 = step * tri->dx31; + int i; + unsigned mask = 0; - int ix, iy; - uint32_t ALIGN16_ATTRIB mask[4][4]; - - - for (iy = 0; iy < 4; iy += 2) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (ix = 0; ix < 2; ix ++) { - - do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); - - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } + for (i = 0; i < 16; i++) + mask |= (~(((c1 + tri->step[0][i]) | + (c2 + tri->step[1][i]) | + (c3 + tri->step[2][i])) >> 31)) & (1 << i); + /* As we do trivial reject already, masks should rarely be all * zero: */ - lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 6c9f75e90ce..a5a0407a57e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -359,6 +359,32 @@ do_triangle_ccw(struct setup_context *setup, maxx = tri->maxx / TILESIZE; maxy = tri->maxy / TILESIZE; + { + int xstep1 = -tri->dy12; + int xstep2 = -tri->dy23; + int xstep3 = -tri->dy31; + + int ystep1 = tri->dx12; + int ystep2 = tri->dx23; + int ystep3 = tri->dx31; + + int ix, iy; + int qx, qy; + int i = 0; + + for (qy = 0; qy < 4; qy += 2) { + for (qx = 0; qx < 4; qx += 2) { + for (iy = 0; iy < 2; iy++) { + for (ix = 0; ix < 2; ix++, i++) { + tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy)); + tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy)); + tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy)); + } + } + } + } + } + /* Convert to tile coordinates: */ if (miny == maxy && minx == maxx) -- cgit v1.2.3 From 7b116e13a2aa28a699e30c907c1b1ae5e04cab28 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 03:17:17 +0100 Subject: llvmpipe: pass mask as a linear encoding of the 4x4 block --- src/gallium/drivers/llvmpipe/lp_rast.c | 41 ++++++++++++++++++++++++----- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 15 ++++------- 2 files changed, 40 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 39fb8cdb6ba..6fd6acc0fa4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -216,16 +216,45 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - uint32_t ALIGN16_ATTRIB masks[16]; - unsigned ix, iy, i; + uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; + unsigned ix, iy; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); - /* mask */ - for (i = 0; i < 16; ++i) - masks[i] = mask & (1 << i) ? ~0 : 0; + /* mask: the rasterizer wants to treat pixels in 4x4 blocks, but + * the pixel shader wants to swizzle them into 4 2x2 quads. + * + * Additionally, the pixel shader wants masks as full dword ~0, + * while the rasterizer wants to pack per-pixel bits tightly. + */ +#if 0 + unsigned qx, qy; + for (qy = 0; qy < 2; ++qy) + for (qx = 0; qx < 2; ++qx) + for (iy = 0; iy < 2; ++iy) + for (ix = 0; ix < 2; ++ix) + masks[qy][qx][iy][ix] = mask & (1 << (qy*8+iy*4+qx*2+ix)) ? ~0 : 0; +#else + masks[0][0][0][0] = mask & (1 << (0*8+0*4+0*2+0)) ? ~0 : 0; + masks[0][0][0][1] = mask & (1 << (0*8+0*4+0*2+1)) ? ~0 : 0; + masks[0][0][1][0] = mask & (1 << (0*8+1*4+0*2+0)) ? ~0 : 0; + masks[0][0][1][1] = mask & (1 << (0*8+1*4+0*2+1)) ? ~0 : 0; + masks[0][1][0][0] = mask & (1 << (0*8+0*4+1*2+0)) ? ~0 : 0; + masks[0][1][0][1] = mask & (1 << (0*8+0*4+1*2+1)) ? ~0 : 0; + masks[0][1][1][0] = mask & (1 << (0*8+1*4+1*2+0)) ? ~0 : 0; + masks[0][1][1][1] = mask & (1 << (0*8+1*4+1*2+1)) ? ~0 : 0; + + masks[1][0][0][0] = mask & (1 << (1*8+0*4+0*2+0)) ? ~0 : 0; + masks[1][0][0][1] = mask & (1 << (1*8+0*4+0*2+1)) ? ~0 : 0; + masks[1][0][1][0] = mask & (1 << (1*8+1*4+0*2+0)) ? ~0 : 0; + masks[1][0][1][1] = mask & (1 << (1*8+1*4+0*2+1)) ? ~0 : 0; + masks[1][1][0][0] = mask & (1 << (1*8+0*4+1*2+0)) ? ~0 : 0; + masks[1][1][0][1] = mask & (1 << (1*8+0*4+1*2+1)) ? ~0 : 0; + masks[1][1][1][0] = mask & (1 << (1*8+1*4+1*2+0)) ? ~0 : 0; + masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; +#endif ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -251,7 +280,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - &masks[0], + &masks[0][0][0][0], color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index a5a0407a57e..cf8643fc631 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -369,18 +369,13 @@ do_triangle_ccw(struct setup_context *setup, int ystep3 = tri->dx31; int ix, iy; - int qx, qy; int i = 0; - for (qy = 0; qy < 4; qy += 2) { - for (qx = 0; qx < 4; qx += 2) { - for (iy = 0; iy < 2; iy++) { - for (ix = 0; ix < 2; ix++, i++) { - tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy)); - tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy)); - tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy)); - } - } + for (iy = 0; iy < 4; iy++) { + for (ix = 0; ix < 4; ix++, i++) { + tri->step[0][i] = xstep1 * ix + ystep1 * iy; + tri->step[1][i] = xstep2 * ix + ystep2 * iy; + tri->step[2][i] = xstep3 * ix + ystep3 * iy; } } } -- cgit v1.2.3 From 1735325a23156b330c2281c91aec4a9b39ecbad9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 03:38:07 +0100 Subject: llvmpipe: recursive rasterization within a tile --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 174 ++++++++++++++++------------- 1 file changed, 98 insertions(+), 76 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index b5a3753a881..567e2231682 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -40,7 +40,7 @@ /* Render a 4x4 unmasked block: */ -static void block_full( struct lp_rasterizer *rast, +static void block_full_4( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { @@ -50,17 +50,30 @@ static void block_full( struct lp_rasterizer *rast, } +static void block_full_16( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y ) +{ + unsigned mask = ~0; + unsigned ix, iy; + + for (iy = 0; iy < 16; iy+=4) + for (ix = 0; ix < 16; ix+=4) + lp_rast_shade_quads(rast, &tri->inputs, x + ix, y + iy , mask); +} + + /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void -do_block( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) +do_block_4( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) { int i; unsigned mask = 0; @@ -74,10 +87,54 @@ do_block( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all * zero: */ - lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); + if (mask) + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); } +static void +do_block_16( struct lp_rasterizer *rast, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) +{ + int ix,iy,i = 0; + + int ei1 = tri->ei1 << 2; + int ei2 = tri->ei2 << 2; + int ei3 = tri->ei3 << 2; + + int eo1 = tri->eo1 << 2; + int eo2 = tri->eo2 << 2; + int eo3 = tri->eo3 << 2; + for (iy = 0; iy < 16; iy+=4) + { + for (ix = 0; ix < 16; ix+=4, i++) + { + int cx1 = c1 + (tri->step[0][i] << 2); + int cx2 = c2 + (tri->step[1][i] << 2); + int cx3 = c3 + (tri->step[2][i] << 2); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full_4(rast, tri, x+ix, y+iy); /* trivial accept */ + } + else + { + do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + } + } + } +} /* Scan the tile in chunks and figure out which pixels to rasterize * for this triangle: @@ -87,84 +144,49 @@ void lp_rast_triangle( struct lp_rasterizer *rast, { const struct lp_rast_triangle *tri = arg.triangle; - const int step = BLOCKSIZE; + int x = rast->x; + int y = rast->y; + int ix,iy,i = 0; - int ei1 = tri->ei1 * step; - int ei2 = tri->ei2 * step; - int ei3 = tri->ei3 * step; + int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; + int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; + int c3 = tri->c3 + tri->dx31 * y - tri->dy31 * x; - int eo1 = tri->eo1 * step; - int eo2 = tri->eo2 * step; - int eo3 = tri->eo3 * step; + int ei1 = tri->ei1 << 4; + int ei2 = tri->ei2 << 4; + int ei3 = tri->ei3 << 4; - int xstep1 = -step * tri->dy12; - int xstep2 = -step * tri->dy23; - int xstep3 = -step * tri->dy31; - - int ystep1 = step * tri->dx12; - int ystep2 = step * tri->dx23; - int ystep3 = step * tri->dx31; - - /* Clamp to tile dimensions: - */ - int minx = MAX2(tri->minx, rast->x); - int miny = MAX2(tri->miny, rast->y); - int maxx = MIN2(tri->maxx, rast->x + TILE_SIZE); - int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE); - - int x, y; - int c1, c2, c3; + int eo1 = tri->eo1 << 4; + int eo2 = tri->eo2 << 4; + int eo3 = tri->eo3 << 4; debug_printf("%s\n", __FUNCTION__); - if (miny == maxy || minx == maxx) { - debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__); - return; - } - - minx &= ~(BLOCKSIZE-1); - miny &= ~(BLOCKSIZE-1); - c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; - c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; - c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; - - for (y = miny; y < maxy; y += BLOCKSIZE) + for (iy = 0; iy < 64; iy+=16) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (x = minx; x < maxx; x += BLOCKSIZE) + for (ix = 0; ix < 64; ix+=16, i++) { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full(rast, tri, x, y); /* trivial accept */ - } - else - { - do_block(rast, tri, x, y, cx1, cx2, cx3); - } - - /* Iterate cx values across the region: - */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; + int cx1 = c1 + (tri->step[0][i] << 4); + int cx2 = c2 + (tri->step[1][i] << 4); + int cx3 = c3 + (tri->step[2][i] << 4); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + block_full_16(rast, tri, x+ix, y+iy); /* trivial accept */ + } + else + { + do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + } } - - /* Iterate c values down the region: - */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; } } -- cgit v1.2.3 From 8d752a20c6f70b442ac2210cce0fd001499be5f6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 08:56:58 +0100 Subject: llvmpipe: build list of 4x4 blocks to be shaded --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 38 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 567e2231682..12ac840ef24 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -36,30 +36,33 @@ #define BLOCKSIZE 4 - +static struct { + int x; + int y; + unsigned mask; +} blocks[256]; +static int nr_blocks; /* Render a 4x4 unmasked block: */ static void block_full_4( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, int x, int y ) { - unsigned mask = ~0; - - lp_rast_shade_quads(rast, &tri->inputs, x, y, mask); + blocks[nr_blocks].x = x; + blocks[nr_blocks].y = y; + blocks[nr_blocks].mask = ~0; + nr_blocks++; } static void block_full_16( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, int x, int y ) { - unsigned mask = ~0; unsigned ix, iy; for (iy = 0; iy < 16; iy+=4) for (ix = 0; ix < 16; ix+=4) - lp_rast_shade_quads(rast, &tri->inputs, x + ix, y + iy , mask); + block_full_4(rast, x + ix, y + iy); } @@ -87,8 +90,12 @@ do_block_4( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all * zero: */ - if (mask) - lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); + if (mask) { + blocks[nr_blocks].x = x; + blocks[nr_blocks].y = y; + blocks[nr_blocks].mask = mask; + nr_blocks++; + } } static void @@ -126,7 +133,7 @@ do_block_16( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { - block_full_4(rast, tri, x+ix, y+iy); /* trivial accept */ + block_full_4(rast, x+ix, y+iy); /* trivial accept */ } else { @@ -162,6 +169,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, debug_printf("%s\n", __FUNCTION__); + nr_blocks = 0; for (iy = 0; iy < 64; iy+=16) { @@ -180,7 +188,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { - block_full_16(rast, tri, x+ix, y+iy); /* trivial accept */ + block_full_16(rast, x+ix, y+iy); /* trivial accept */ } else { @@ -188,5 +196,11 @@ void lp_rast_triangle( struct lp_rasterizer *rast, } } } + + for (i = 0; i < nr_blocks; i++) + lp_rast_shade_quads(rast, &tri->inputs, + blocks[i].x, + blocks[i].y, + blocks[i].mask); } -- cgit v1.2.3 From 3199c6e764c20c69a76c561b9f4b89a23e5a97f5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 09:44:23 +0100 Subject: llvmpipe: move block list into rast struct --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 7 ++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 35 +++++++++++++---------------- 2 files changed, 22 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 2333729807e..323c046cf46 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -74,6 +74,13 @@ struct lp_rasterizer { unsigned clear_depth; char clear_stencil; } state; + + int nr_blocks; + struct { + unsigned x; + unsigned y; + unsigned mask; + } blocks[256]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 12ac840ef24..174e6ab19b3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -36,22 +36,17 @@ #define BLOCKSIZE 4 -static struct { - int x; - int y; - unsigned mask; -} blocks[256]; -static int nr_blocks; /* Render a 4x4 unmasked block: */ static void block_full_4( struct lp_rasterizer *rast, int x, int y ) { - blocks[nr_blocks].x = x; - blocks[nr_blocks].y = y; - blocks[nr_blocks].mask = ~0; - nr_blocks++; + int i = rast->nr_blocks; + rast->blocks[i].x = x; + rast->blocks[i].y = y; + rast->blocks[i].mask = ~0; + rast->nr_blocks++; } @@ -86,15 +81,15 @@ do_block_4( struct lp_rasterizer *rast, (c2 + tri->step[1][i]) | (c3 + tri->step[2][i])) >> 31)) & (1 << i); - /* As we do trivial reject already, masks should rarely be all * zero: */ if (mask) { - blocks[nr_blocks].x = x; - blocks[nr_blocks].y = y; - blocks[nr_blocks].mask = mask; - nr_blocks++; + int i = rast->nr_blocks; + rast->blocks[i].x = x; + rast->blocks[i].y = y; + rast->blocks[i].mask = mask; + rast->nr_blocks++; } } @@ -169,7 +164,7 @@ void lp_rast_triangle( struct lp_rasterizer *rast, debug_printf("%s\n", __FUNCTION__); - nr_blocks = 0; + rast->nr_blocks = 0; for (iy = 0; iy < 64; iy+=16) { @@ -197,10 +192,10 @@ void lp_rast_triangle( struct lp_rasterizer *rast, } } - for (i = 0; i < nr_blocks; i++) + for (i = 0; i < rast->nr_blocks; i++) lp_rast_shade_quads(rast, &tri->inputs, - blocks[i].x, - blocks[i].y, - blocks[i].mask); + rast->blocks[i].x, + rast->blocks[i].y, + rast->blocks[i].mask); } -- cgit v1.2.3 From cccb1842092bd1b9f35aee0ac21d580c0365e4a5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 10:11:52 +0100 Subject: llvmpipe: minor opts to setup_tri --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 66 +++++++++++++---------------- 1 file changed, 30 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index cf8643fc631..89d75710dda 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -221,8 +221,6 @@ static void setup_tri_coefficients( struct setup_context *setup, -/* XXX: do this by add/subtracting a large floating point number: - */ static inline int subpixel_snap( float a ) { return util_iround(FIXED_ONE * a); @@ -235,15 +233,6 @@ static INLINE void bin_triangle( struct cmd_block_list *list, } -/* to avoid having to allocate power-of-four, square render targets, - * end up having a specialized version of the above that runs only at - * the topmost level. - * - * at the topmost level there may be an arbitary number of steps on - * either dimension, so this loop needs to be either separately - * code-generated and unrolled for each render target size, or kept as - * generic looping code: - */ #define MIN3(a,b,c) MIN2(MIN2(a,b),c) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) @@ -354,11 +343,6 @@ do_triangle_ccw(struct setup_context *setup, tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; - minx = tri->minx / TILESIZE; - miny = tri->miny / TILESIZE; - maxx = tri->maxx / TILESIZE; - maxy = tri->maxy / TILESIZE; - { int xstep1 = -tri->dy12; int xstep2 = -tri->dy23; @@ -370,16 +354,37 @@ do_triangle_ccw(struct setup_context *setup, int ix, iy; int i = 0; + + int c1 = 0; + int c2 = 0; + int c3 = 0; for (iy = 0; iy < 4; iy++) { + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; + for (ix = 0; ix < 4; ix++, i++) { - tri->step[0][i] = xstep1 * ix + ystep1 * iy; - tri->step[1][i] = xstep2 * ix + ystep2 * iy; - tri->step[2][i] = xstep3 * ix + ystep3 * iy; + tri->step[0][i] = cx1; + tri->step[1][i] = cx2; + tri->step[2][i] = cx3; + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; } + + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; } } + minx = tri->minx / TILESIZE; + miny = tri->miny / TILESIZE; + maxx = tri->maxx / TILESIZE; + maxy = tri->maxy / TILESIZE; + + /* Convert to tile coordinates: */ if (miny == maxy && minx == maxx) @@ -419,10 +424,7 @@ do_triangle_ccw(struct setup_context *setup, int x, y; - /* Subdivide space into NxM blocks, where each block is square and - * power-of-four in dimension. - * - * Trivially accept or reject blocks, else jump to per-pixel + /* Trivially accept or reject blocks, else jump to per-pixel * examination above. */ for (y = miny; y <= maxy; y++) @@ -430,38 +432,30 @@ do_triangle_ccw(struct setup_context *setup, int cx1 = c1; int cx2 = c2; int cx3 = c3; + int in = 0; for (x = minx; x <= maxx; x++) { - assert(cx1 == - tri->c1 + - tri->dx12 * y * TILESIZE - - tri->dy12 * x * TILESIZE); - assert(cx2 == - tri->c2 + - tri->dx23 * y * TILESIZE - - tri->dy23 * x * TILESIZE); - assert(cx3 == - tri->c3 + - tri->dx31 * y * TILESIZE - - tri->dy31 * x * TILESIZE); - if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || cx3 + eo3 < 0) { /* do nothing */ + if (in) + break; } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && cx3 + ei3 > 0) { + in = 1; /* shade whole tile */ bin_command( &setup->tile[x][y], lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } else { + in = 1; /* shade partial tile */ bin_command( &setup->tile[x][y], lp_rast_triangle, -- cgit v1.2.3 From 341edde1d2e9f9f989d41869cc436b51942941e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 Oct 2009 14:35:54 +0100 Subject: llvmpipe: remove dead code --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 89d75710dda..041716adc93 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -227,13 +227,6 @@ static inline int subpixel_snap( float a ) } -static INLINE void bin_triangle( struct cmd_block_list *list, - const struct lp_rast_triangle arg ) -{ -} - - - #define MIN3(a,b,c) MIN2(MIN2(a,b),c) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) -- cgit v1.2.3 From 694f05ac18c54253910678709f2dd35c36f1e912 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 Oct 2009 15:21:11 +0100 Subject: llvmpipe: remove one of two definitions of TILESIZE --- src/gallium/drivers/llvmpipe/lp_rast.c | 8 ++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 2 -- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++---- src/gallium/drivers/llvmpipe/lp_setup_context.h | 5 +++-- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 ++++++++++---------- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 3 ++- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6fd6acc0fa4..6e94e22e5b2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -71,8 +71,8 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, rast->state.write_zstencil = write_zstencil; rast->state.write_color = write_color; - rast->check_for_clipped_tiles = (width % TILESIZE != 0 || - height % TILESIZE != 0); + rast->check_for_clipped_tiles = (width % TILE_SIZE != 0 || + height % TILE_SIZE != 0); if (cbuf) { rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, @@ -311,8 +311,8 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) { const unsigned x = rast->x; const unsigned y = rast->y; - unsigned w = TILESIZE; - unsigned h = TILESIZE; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; if (x + w > rast->width) w -= x + w - rast->width; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 318bf73b159..282b9a46d1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -41,8 +41,6 @@ struct pipe_screen; #define FIXED_ORDER 4 #define FIXED_ONE (1<tiles_y; j++) { lp_rast_start_tile( rast, - i * TILESIZE, - j * TILESIZE ); + i * TILE_SIZE, + j * TILE_SIZE ); for (block = setup->tile[i][j].head; block; block = block->next) { for (k = 0; k < block->count; k++) { @@ -241,8 +241,8 @@ begin_binning( struct setup_context *setup ) setup->fb.zsbuf->height); } - setup->tiles_x = align(setup->fb.width, TILESIZE) / TILESIZE; - setup->tiles_y = align(setup->fb.height, TILESIZE) / TILESIZE; + setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; + setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index d91ffc7c20a..938f6ce262d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -30,14 +30,15 @@ #include "lp_setup.h" #include "lp_rast.h" +#include "lp_tile_soa.h" /* for TILE_SIZE */ /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. */ #define MAXHEIGHT 2048 #define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILESIZE) -#define TILES_Y (MAXHEIGHT / TILESIZE) +#define TILES_X (MAXWIDTH / TILE_SIZE) +#define TILES_Y (MAXHEIGHT / TILE_SIZE) #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 041716adc93..f2665c11df3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -372,10 +372,10 @@ do_triangle_ccw(struct setup_context *setup, } } - minx = tri->minx / TILESIZE; - miny = tri->miny / TILESIZE; - maxx = tri->maxx / TILESIZE; - maxy = tri->maxy / TILESIZE; + minx = tri->minx / TILE_SIZE; + miny = tri->miny / TILE_SIZE; + maxx = tri->maxx / TILE_SIZE; + maxy = tri->maxy / TILE_SIZE; /* Convert to tile coordinates: @@ -390,14 +390,14 @@ do_triangle_ccw(struct setup_context *setup, else { int c1 = (tri->c1 + - tri->dx12 * miny * TILESIZE - - tri->dy12 * minx * TILESIZE); + tri->dx12 * miny * TILE_SIZE - + tri->dy12 * minx * TILE_SIZE); int c2 = (tri->c2 + - tri->dx23 * miny * TILESIZE - - tri->dy23 * minx * TILESIZE); + tri->dx23 * miny * TILE_SIZE - + tri->dy23 * minx * TILE_SIZE); int c3 = (tri->c3 + - tri->dx31 * miny * TILESIZE - - tri->dy31 * minx * TILESIZE); + tri->dx31 * miny * TILE_SIZE - + tri->dy31 * minx * TILE_SIZE); int ei1 = tri->ei1 << TILE_ORDER; int ei2 = tri->ei2 << TILE_ORDER; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index d72d6d2ef15..0e874ce4515 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -43,7 +43,8 @@ struct pipe_transfer; /** * Cache tile size (width and height). This needs to be a power of two. */ -#define TILE_SIZE 64 +#define TILE_ORDER 6 +#define TILE_SIZE (1< Date: Thu, 22 Oct 2009 17:21:37 +0100 Subject: llvmpipe: fix the worst of the depth regressions since switch to 4x4 --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6e94e22e5b2..32cd5e09f54 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -265,7 +265,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* depth buffer */ assert((x % 2) == 0); assert((y % 2) == 0); - depth = tile->depth + iy*TILE_SIZE + 2*ix; + depth = tile->depth + (iy/4)*(16*16) + (ix/4)*16; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(masks, 16)); -- cgit v1.2.3 From bfa1a766d6df39963daf54fbc63a84d9c139ec7e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 10:43:23 -0700 Subject: llvmpipe: update C_SOURCES --- src/gallium/drivers/llvmpipe/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 8f05e5a6fd1..bfe34396d95 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -33,7 +33,10 @@ C_SOURCES = \ lp_flush.c \ lp_jit.c \ lp_prim_vbuf.c \ + lp_rast.c \ + lp_rast_tri.c \ lp_setup.c \ + lp_setup_tri.c \ lp_query.c \ lp_screen.c \ lp_state_blend.c \ @@ -46,8 +49,6 @@ C_SOURCES = \ lp_state_vertex.c \ lp_state_vs.c \ lp_surface.c \ - lp_tex_cache.c \ - lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_soa.c -- cgit v1.2.3 From 7d042ac2a285c220a396d91a6dbe5c7f4e697c71 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 14:01:31 -0700 Subject: llvmpipe: minor refactoring of bin rasterization code --- src/gallium/drivers/llvmpipe/lp_setup.c | 44 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6d841474680..70912323501 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -170,13 +170,35 @@ static void bin_everywhere( struct setup_context *setup, } +/** Rasterize commands for a single bin */ +static void +rasterize_bin( struct lp_rasterizer *rast, + struct cmd_block_list *commands, + int x, int y) +{ + struct cmd_block *block; + unsigned k; + + lp_rast_start_tile( rast, x, y ); + + /* simply execute each of the commands in the block list */ + for (block = commands->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, block->arg[k] ); + } + } + + lp_rast_end_tile( rast ); +} + + +/** Rasterize all tile's bins */ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { struct lp_rasterizer *rast = setup->rast; - struct cmd_block *block; - unsigned i,j,k; + unsigned i, j; SETUP_DEBUG("%s\n", __FUNCTION__); @@ -187,23 +209,13 @@ rasterize_bins( struct setup_context *setup, setup->fb.zsbuf != NULL && write_depth, setup->fb.width, setup->fb.height ); - - + /* loop over tile bins, rasterize each */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - - lp_rast_start_tile( rast, - i * TILE_SIZE, - j * TILE_SIZE ); - - for (block = setup->tile[i][j].head; block; block = block->next) { - for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, block->arg[k] ); - } - } - - lp_rast_end_tile( rast ); + rasterize_bin( rast, &setup->tile[i][j], + i * TILE_SIZE, + j * TILE_SIZE ); } } -- cgit v1.2.3 From 7505510c7b7c33f3c571647c0398da7e1b823806 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 14:02:01 -0700 Subject: llvmpipe: add a bunch of comments --- src/gallium/drivers/llvmpipe/lp_rast.c | 30 ++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_rast.h | 5 +++-- src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 +++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 ++++++++++++--------- 4 files changed, 54 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 32cd5e09f54..09495f6288e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -51,6 +51,10 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) } +/** + * Begin the rasterization phase. + * Map the framebuffer surfaces. Initialize the 'rast' state. + */ boolean lp_rast_begin( struct lp_rasterizer *rast, struct pipe_surface *cbuf, struct pipe_surface *zsbuf, @@ -95,6 +99,10 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, } +/** + * Finish the rasterization phase. + * Unmap framebuffer surfaces. + */ void lp_rast_end( struct lp_rasterizer *rast ) { struct pipe_screen *screen = rast->screen; @@ -120,7 +128,10 @@ void lp_rast_end( struct lp_rasterizer *rast ) -/* Begining of each tile: +/** + * Begining rasterization of a tile. + * \param x window X position of the tile, in pixels + * \param y window Y position of the tile, in pixels */ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, @@ -132,6 +143,10 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, rast->y = y; } + +/** + * Clear the rasterizer's current color tile. + */ void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { @@ -157,6 +172,10 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, } } + +/** + * Clear the rasterizer's current z/stencil tile. + */ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { @@ -307,6 +326,9 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, */ +/** + * Write the rasterizer's color tile to the framebuffer. + */ static void lp_rast_store_color( struct lp_rasterizer *rast ) { const unsigned x = rast->x; @@ -331,6 +353,9 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) } +/** + * Write the rasterizer's z/stencil tile to the framebuffer. + */ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) { RAST_DEBUG("%s\n", __FUNCTION__); @@ -339,6 +364,9 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) } +/** + * Write the rasterizer's tiles to the framebuffer. + */ void lp_rast_end_tile( struct lp_rasterizer *rast ) { RAST_DEBUG("%s\n", __FUNCTION__); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 282b9a46d1b..a50b73b27f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -79,6 +79,7 @@ struct lp_rast_shader_inputs { * plus inputs to run the shader: */ struct lp_rast_triangle { + /* bounding box of tri (in pixels) */ int minx; int maxx; int miny; @@ -94,12 +95,12 @@ struct lp_rast_triangle { int eo2; int eo3; - /* y deltas for vertex pairs */ + /* y deltas for vertex pairs (in fixed pt) */ int dy12; int dy23; int dy31; - /* x deltas for vertex pairs */ + /* x deltas for vertex pairs (in fixed pt) */ int dx12; int dx23; int dx31; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 938f6ce262d..3209e41c01f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -87,6 +87,7 @@ struct setup_context { struct cmd_block_list tile[TILES_X][TILES_Y]; struct data_block_list data; + /* size of framebuffer, in tiles */ unsigned tiles_x; unsigned tiles_y; @@ -154,6 +155,11 @@ void lp_setup_choose_point( struct setup_context *setup ); void lp_setup_new_data_block( struct data_block_list *list ); void lp_setup_new_cmd_block( struct cmd_block_list *list ); + +/** + * Allocate space for a command/data in the given block list. + * Grow the block list if needed. + */ static INLINE void *get_data( struct data_block_list *list, unsigned size) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index f2665c11df3..cf862554067 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -230,6 +230,11 @@ static inline int subpixel_snap( float a ) #define MIN3(a,b,c) MIN2(MIN2(a,b),c) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) +/** + * Do basic setup for triangle rasterization and determine which + * framebuffer tiles are touched. Put the triangle in the bins for the + * tiles which we overlap. + */ static void do_triangle_ccw(struct setup_context *setup, const float (*v1)[4], @@ -237,15 +242,14 @@ do_triangle_ccw(struct setup_context *setup, const float (*v3)[4], boolean frontfacing ) { - + /* x/y positions in fixed point */ + const int x1 = subpixel_snap(v1[0][0]); + const int x2 = subpixel_snap(v2[0][0]); + const int x3 = subpixel_snap(v3[0][0]); const int y1 = subpixel_snap(v1[0][1]); const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - const int x1 = subpixel_snap(v1[0][0]); - const int x2 = subpixel_snap(v2[0][0]); - const int x3 = subpixel_snap(v3[0][0]); - struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); float area; int minx, maxx, miny, maxy; @@ -270,7 +274,7 @@ do_triangle_ccw(struct setup_context *setup, return; } - // Bounding rectangle + /* Bounding rectangle (in pixels) */ tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER; tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER; tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER; @@ -372,13 +376,14 @@ do_triangle_ccw(struct setup_context *setup, } } + /* Convert to tile coordinates: + */ minx = tri->minx / TILE_SIZE; miny = tri->miny / TILE_SIZE; maxx = tri->maxx / TILE_SIZE; maxy = tri->maxy / TILE_SIZE; - - /* Convert to tile coordinates: + /* Determine which tile(s) intersect the triangle's bounding box */ if (miny == maxy && minx == maxx) { @@ -442,8 +447,9 @@ do_triangle_ccw(struct setup_context *setup, cx3 + ei3 > 0) { in = 1; - /* shade whole tile */ - bin_command( &setup->tile[x][y], lp_rast_shade_tile, + /* triangle covers the whole tile- shade whole tile */ + bin_command( &setup->tile[x][y], + lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } else -- cgit v1.2.3 From 63b1f23b3eb6fceaff7c2ceed925ef57f63f9fa2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 15:43:01 -0700 Subject: llvmpipe: comments, reformatting and assertions in tri rast code --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 117 +++++++++++++++++------------ 1 file changed, 70 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 174e6ab19b3..428870bb041 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,12 +37,16 @@ #define BLOCKSIZE 4 -/* Render a 4x4 unmasked block: +/** + * Add a 4x4 block of pixels to the block list. + * All pixels are known to be inside the triangle's bounds. */ -static void block_full_4( struct lp_rasterizer *rast, - int x, int y ) +static void +block_full_4( struct lp_rasterizer *rast, int x, int y ) { int i = rast->nr_blocks; + assert(x % 4 == 0); + assert(y % 4 == 0); rast->blocks[i].x = x; rast->blocks[i].y = y; rast->blocks[i].mask = ~0; @@ -50,20 +54,26 @@ static void block_full_4( struct lp_rasterizer *rast, } -static void block_full_16( struct lp_rasterizer *rast, - int x, int y ) +/** + * Add a 16x16 block of pixels to the block list. + * All pixels are known to be inside the triangle's bounds. + */ +static void +block_full_16( struct lp_rasterizer *rast, int x, int y ) { unsigned ix, iy; - - for (iy = 0; iy < 16; iy+=4) - for (ix = 0; ix < 16; ix+=4) + assert(x % 16 == 0); + assert(y % 16 == 0); + for (iy = 0; iy < 16; iy += 4) + for (ix = 0; ix < 16; ix += 4) block_full_4(rast, x + ix, y + iy); } - -/* Evaluate each pixel in a block, generate a mask and possibly render - * the quad: +/** + * Evaluate each pixel in a 4x4 block to determine if it lies within + * the triangle's bounds. + * Generate a mask of in/out flags and add the block to the blocks list. */ static void do_block_4( struct lp_rasterizer *rast, @@ -76,13 +86,15 @@ do_block_4( struct lp_rasterizer *rast, int i; unsigned mask = 0; + assert(x % 4 == 0); + assert(y % 4 == 0); + for (i = 0; i < 16; i++) mask |= (~(((c1 + tri->step[0][i]) | (c2 + tri->step[1][i]) | (c3 + tri->step[2][i])) >> 31)) & (1 << i); - /* As we do trivial reject already, masks should rarely be all - * zero: + /* As we do trivial reject already, masks should rarely be all zero: */ if (mask) { int i = rast->nr_blocks; @@ -93,15 +105,20 @@ do_block_4( struct lp_rasterizer *rast, } } + +/** + * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out + * of the triangle's bounds. + */ static void do_block_16( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) { - int ix,iy,i = 0; + int ix, iy, i = 0; int ei1 = tri->ei1 << 2; int ei2 = tri->ei2 << 2; @@ -111,44 +128,48 @@ do_block_16( struct lp_rasterizer *rast, int eo2 = tri->eo2 << 2; int eo3 = tri->eo3 << 2; - for (iy = 0; iy < 16; iy+=4) - { - for (ix = 0; ix < 16; ix+=4, i++) - { + assert(x % 16 == 0); + assert(y % 16 == 0); + + for (iy = 0; iy < 16; iy+=4) { + for (ix = 0; ix < 16; ix+=4, i++) { int cx1 = c1 + (tri->step[0][i] << 2); int cx2 = c2 + (tri->step[1][i] << 2); int cx3 = c3 + (tri->step[2][i] << 2); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full_4(rast, x+ix, y+iy); /* trivial accept */ + cx3 + ei3 > 0) { + /* the block is completely inside the triangle */ + block_full_4(rast, x+ix, y+iy); } - else - { + else { + /* the block is partially in/out of the triangle */ do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3); } } } } -/* Scan the tile in chunks and figure out which pixels to rasterize - * for this triangle: + +/** + * Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle. */ -void lp_rast_triangle( struct lp_rasterizer *rast, - const union lp_rast_cmd_arg arg ) +void +lp_rast_triangle( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg arg ) { const struct lp_rast_triangle *tri = arg.triangle; int x = rast->x; int y = rast->y; - int ix,iy,i = 0; + int ix, iy, i = 0; int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; @@ -166,36 +187,38 @@ void lp_rast_triangle( struct lp_rasterizer *rast, rast->nr_blocks = 0; - for (iy = 0; iy < 64; iy+=16) - { - for (ix = 0; ix < 64; ix+=16, i++) - { + /* Walk over the tile to build a list of 4x4 pixel blocks which will + * be filled/shaded. We do this at two granularities: 16x16 blocks + * and then 4x4 blocks. + */ + for (iy = 0; iy < TILE_SIZE; iy += 16) { + for (ix = 0; ix < TILE_SIZE; ix += 16, i++) { int cx1 = c1 + (tri->step[0][i] << 4); int cx2 = c2 + (tri->step[1][i] << 4); int cx3 = c3 + (tri->step[2][i] << 4); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { - block_full_16(rast, x+ix, y+iy); /* trivial accept */ + cx3 + ei3 > 0) { + /* the block is completely inside the triangle */ + block_full_16(rast, x+ix, y+iy); } - else - { + else { + /* the block is partially in/out of the triangle */ do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3); } } } + /* Shade the 4x4 pixel blocks */ for (i = 0; i < rast->nr_blocks; i++) lp_rast_shade_quads(rast, &tri->inputs, rast->blocks[i].x, rast->blocks[i].y, rast->blocks[i].mask); } - -- cgit v1.2.3 From 938acf0367416c989d49f231da855ba6ea8d64ca Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 16:00:04 -0700 Subject: llvmpipe: make nr_blocks unsigned --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 323c046cf46..723bb5ad695 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -75,7 +75,7 @@ struct lp_rasterizer { char clear_stencil; } state; - int nr_blocks; + unsigned nr_blocks; struct { unsigned x; unsigned y; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 428870bb041..07b0eccf1e7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -44,7 +44,7 @@ static void block_full_4( struct lp_rasterizer *rast, int x, int y ) { - int i = rast->nr_blocks; + const unsigned i = rast->nr_blocks; assert(x % 4 == 0); assert(y % 4 == 0); rast->blocks[i].x = x; @@ -97,7 +97,7 @@ do_block_4( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all zero: */ if (mask) { - int i = rast->nr_blocks; + const unsigned i = rast->nr_blocks; rast->blocks[i].x = x; rast->blocks[i].y = y; rast->blocks[i].mask = mask; @@ -169,7 +169,8 @@ lp_rast_triangle( struct lp_rasterizer *rast, int x = rast->x; int y = rast->y; - int ix, iy, i = 0; + int ix, iy; + unsigned i = 0; int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; -- cgit v1.2.3 From 6bde3e0fdfe4bfc3fee33ca0d1d8e13969eb8952 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 15:51:07 -0700 Subject: llvmpipe: replace shifts with multiplies to be clearer The compiler will still do the multiplies with shifts. It's just a bit easier to follow the logic with multiplies. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 07b0eccf1e7..9543b86ecd9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -120,22 +120,22 @@ do_block_16( struct lp_rasterizer *rast, { int ix, iy, i = 0; - int ei1 = tri->ei1 << 2; - int ei2 = tri->ei2 << 2; - int ei3 = tri->ei3 << 2; + int ei1 = tri->ei1 * 4; + int ei2 = tri->ei2 * 4; + int ei3 = tri->ei3 * 4; - int eo1 = tri->eo1 << 2; - int eo2 = tri->eo2 << 2; - int eo3 = tri->eo3 << 2; + int eo1 = tri->eo1 * 4; + int eo2 = tri->eo2 * 4; + int eo3 = tri->eo3 * 4; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy+=4) { for (ix = 0; ix < 16; ix+=4, i++) { - int cx1 = c1 + (tri->step[0][i] << 2); - int cx2 = c2 + (tri->step[1][i] << 2); - int cx3 = c3 + (tri->step[2][i] << 2); + int cx1 = c1 + (tri->step[0][i] * 4); + int cx2 = c2 + (tri->step[1][i] * 4); + int cx3 = c3 + (tri->step[2][i] * 4); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || @@ -176,13 +176,13 @@ lp_rast_triangle( struct lp_rasterizer *rast, int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; int c3 = tri->c3 + tri->dx31 * y - tri->dy31 * x; - int ei1 = tri->ei1 << 4; - int ei2 = tri->ei2 << 4; - int ei3 = tri->ei3 << 4; + int ei1 = tri->ei1 * 16; + int ei2 = tri->ei2 * 16; + int ei3 = tri->ei3 * 16; - int eo1 = tri->eo1 << 4; - int eo2 = tri->eo2 << 4; - int eo3 = tri->eo3 << 4; + int eo1 = tri->eo1 * 16; + int eo2 = tri->eo2 * 16; + int eo3 = tri->eo3 * 16; debug_printf("%s\n", __FUNCTION__); @@ -194,9 +194,9 @@ lp_rast_triangle( struct lp_rasterizer *rast, */ for (iy = 0; iy < TILE_SIZE; iy += 16) { for (ix = 0; ix < TILE_SIZE; ix += 16, i++) { - int cx1 = c1 + (tri->step[0][i] << 4); - int cx2 = c2 + (tri->step[1][i] << 4); - int cx3 = c3 + (tri->step[2][i] << 4); + int cx1 = c1 + (tri->step[0][i] * 16); + int cx2 = c2 + (tri->step[1][i] * 16); + int cx3 = c3 + (tri->step[2][i] * 16); if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From 9c486774913f66c6496cd43cfd9dbd992c28d8cb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 16:24:16 -0700 Subject: llvmpipe: simplify mask computation Make this a little easier to understand. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 9543b86ecd9..f6cb628ed4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -89,10 +89,12 @@ do_block_4( struct lp_rasterizer *rast, assert(x % 4 == 0); assert(y % 4 == 0); - for (i = 0; i < 16; i++) - mask |= (~(((c1 + tri->step[0][i]) | - (c2 + tri->step[1][i]) | - (c3 + tri->step[2][i])) >> 31)) & (1 << i); + for (i = 0; i < 16; i++) { + int any_negative = ((c1 + tri->step[0][i]) | + (c2 + tri->step[1][i]) | + (c3 + tri->step[2][i])) >> 31; + mask |= (~any_negative) & (1 << i); + } /* As we do trivial reject already, masks should rarely be all zero: */ -- cgit v1.2.3 From 63fe997e28b5bfee1f776a220d121987a5fee62e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 1 Dec 2009 16:24:57 -0700 Subject: llvmpipe: added assertions And remove unused BLOCKSIZE. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index f6cb628ed4b..e772a0158a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -34,9 +34,6 @@ #include "lp_tile_soa.h" -#define BLOCKSIZE 4 - - /** * Add a 4x4 block of pixels to the block list. * All pixels are known to be inside the triangle's bounds. @@ -186,6 +183,8 @@ lp_rast_triangle( struct lp_rasterizer *rast, int eo2 = tri->eo2 * 16; int eo3 = tri->eo3 * 16; + assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); + debug_printf("%s\n", __FUNCTION__); rast->nr_blocks = 0; @@ -218,6 +217,8 @@ lp_rast_triangle( struct lp_rasterizer *rast, } } + assert(rast->nr_blocks <= Elements(rast->blocks)); + /* Shade the 4x4 pixel blocks */ for (i = 0; i < rast->nr_blocks; i++) lp_rast_shade_quads(rast, &tri->inputs, -- cgit v1.2.3 From 5750a6426bc8d47f9801be5896b2d0f5ae3a5b12 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Dec 2009 14:55:48 -0700 Subject: llvmpipe: whitespace for readability --- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 0e874ce4515..660cc30c820 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -44,7 +44,7 @@ struct pipe_transfer; * Cache tile size (width and height). This needs to be a power of two. */ #define TILE_ORDER 6 -#define TILE_SIZE (1< Date: Wed, 2 Dec 2009 15:13:45 -0700 Subject: llvmpipe: execute shaders on 4x4 blocks instead of 8x2 This matches the convention used by the recursive rasterizer. Also fixed assorted typos, comments, etc. Now tri-z.c, gears.c, etc look basically right but there's still some cracks in triangle rasterization. --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 135 ++++++++++++++++++--------- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 10 +- src/gallium/drivers/llvmpipe/lp_rast.c | 22 +++-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 ++- 4 files changed, 116 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1e..affeeca6ff9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -45,6 +45,36 @@ #include "lp_bld_interp.h" +/* + * The shader JIT function operates on blocks of quads. + * Each block has 2x2 quads and each quad has 2x2 pixels. + * + * We iterate over the quads in order 0, 1, 2, 3: + * + * ################# + * # | # | # + * #---0---#---1---# + * # | # | # + * ################# + * # | # | # + * #---2---#---3---# + * # | # | # + * ################# + * + * Within each quad, we have four pixels which are represented in SOA + * order: + * + * ######### + * # 0 | 1 # + * #---+---# + * # 2 | 3 # + * ######### + * + * So the green channel (for example) of the four pixels is stored in + * a single vector register: {g0, g1, g2, g3}. + */ + + static void attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) { @@ -55,6 +85,10 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix } +/** + * Initialize the bld->a0, dadx, dady fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, @@ -91,7 +125,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, case TGSI_INTERPOLATE_CONSTANT: a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); a0 = lp_build_broadcast_scalar(&bld->base, a0); - attrib_name(a0, attrib, chan, ".dady"); + attrib_name(a0, attrib, chan, ".a0"); break; default: @@ -135,29 +169,12 @@ coeff_multiply(struct lp_build_interp_soa_context *bld, /** - * Multiply the dadx and dady with the xstep and ystep respectively. + * Emit LLVM code to compute the fragment shader input attribute values. + * For example, for a color input, we'll compute red, green, blue and alpha + * values for the four pixels in a quad. + * Recall that we're operating on 4-element vectors so each arithmetic + * operation is operating on the four pixels in a quad. */ -static void -coeffs_update(struct lp_build_interp_soa_context *bld) -{ - unsigned attrib; - unsigned chan; - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - if (mode != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); - } - } - } - } -} - - static void attribs_init(struct lp_build_interp_soa_context *bld) { @@ -180,7 +197,9 @@ attribs_init(struct lp_build_interp_soa_context *bld) res = a0; if (mode != TGSI_INTERPOLATE_CONSTANT) { + /* res = res + x * dadx */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); + /* res = res + y * dady */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); } @@ -204,13 +223,19 @@ attribs_init(struct lp_build_interp_soa_context *bld) } +/** + * Increment the shader input attribute values. + * This is called when we move from one quad to the next. + */ static void -attribs_update(struct lp_build_interp_soa_context *bld) +attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; + assert(quad_index < 4); + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { unsigned mask = bld->mask[attrib]; unsigned mode = bld->mode[attrib]; @@ -224,13 +249,21 @@ attribs_update(struct lp_build_interp_soa_context *bld) res = bld->attribs_pre[attrib][chan]; - if(bld->xstep) + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad */ + /* build res = res + dadx + dadx */ res = lp_build_add(&bld->base, res, dadx); + res = lp_build_add(&bld->base, res, dadx); + } - if(bld->ystep) + if (quad_index == 2 || quad_index == 3) { + /* bottom-left or bottom-right quad */ + /* build res = res + dady + dady */ res = lp_build_add(&bld->base, res, dady); + res = lp_build_add(&bld->base, res, dady); + } - bld->attribs_pre[attrib][chan] = res; + //XXX bld->attribs_pre[attrib][chan] = res; if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { LLVMValueRef w = bld->pos[3]; @@ -268,17 +301,32 @@ pos_init(struct lp_build_interp_soa_context *bld, } +/** + * Update quad position values when moving to the next quad. + */ static void -pos_update(struct lp_build_interp_soa_context *bld) +pos_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef x = bld->attribs[0][0]; LLVMValueRef y = bld->attribs[0][1]; + const int xstep = 2, ystep = 2; - if(bld->xstep) - x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep)); + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad in block */ + /* build x += xstep */ + x = lp_build_add(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } - if(bld->ystep) - y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep)); + if (quad_index == 2) { + /* bottom-left quad in block */ + /* build y += ystep */ + y = lp_build_add(&bld->base, y, + lp_build_const_scalar(bld->base.type, ystep)); + /* build x -= xstep */ + x = lp_build_sub(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } lp_build_name(x, "pos.x"); lp_build_name(y, "pos.y"); @@ -288,6 +336,9 @@ pos_update(struct lp_build_interp_soa_context *bld) } +/** + * Initialize fragment shader input attribute info. + */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, @@ -297,9 +348,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep) + LLVMValueRef y0) { struct tgsi_parse_context parse; struct tgsi_full_declaration *decl; @@ -357,21 +406,19 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); attribs_init(bld); - - bld->xstep = xstep; - bld->ystep = ystep; - - coeffs_update(bld); } /** - * Advance the position and inputs with the xstep and ystep. + * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld) +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index) { - pos_update(bld); + assert(quad_index < 4); + + pos_update(bld, quad_index); - attribs_update(bld); + attribs_update(bld, quad_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9c57a10879b..e2b3bc1bf0b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -63,9 +63,6 @@ struct lp_build_interp_soa_context LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - int xstep; - int ystep; - /* Attribute values before perspective divide */ LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; @@ -88,12 +85,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep); + LLVMValueRef y0); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld); +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index); #endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 09495f6288e..f88dd4ae680 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -126,8 +126,6 @@ void lp_rast_end( struct lp_rasterizer *rast ) } - - /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels @@ -152,7 +150,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; - RAST_DEBUG("%s %x,%x,%x,%x\n", __FUNCTION__, + RAST_DEBUG("%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], clear_color[1], clear_color[2], @@ -181,7 +179,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, { unsigned i, j; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) @@ -225,6 +223,9 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, } +/** + * Compute shading for a 4x4 block of pixels. + */ void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, @@ -237,6 +238,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, void *depth; uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; unsigned ix, iy; + int block_offset; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); @@ -275,16 +277,20 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; #endif + assert((x % 2) == 0); + assert((y % 2) == 0); + ix = x % TILE_SIZE; iy = y % TILE_SIZE; + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((iy/4)*(16*16) + (ix/4)*16); + /* color buffer */ - color = &TILE_PIXEL(tile->color, ix, iy, 0); + color = tile->color + 4 * block_offset; /* depth buffer */ - assert((x % 2) == 0); - assert((y % 2) == 0); - depth = tile->depth + (iy/4)*(16*16) + (ix/4)*16; + depth = tile->depth + block_offset; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(masks, 16)); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 0541d36580c..aa9c0066333 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -358,6 +358,9 @@ generate_blend(const struct pipe_blend_state *blend, /** * Generate the runtime callable function for the whole fragment pipeline. + * Note that the function which we generate operates on a block of 16 + * pixels at at time. The block contains 2x2 quads. Each quad contains + * 2x2 pixels. */ static struct lp_fragment_shader_variant * generate_fragment(struct llvmpipe_context *lp, @@ -437,8 +440,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ - fs_type.length = 4; /* 4 element per vector */ - num_fs = 4; + fs_type.length = 4; /* 4 elements per vector */ + num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ @@ -509,18 +512,19 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, - x0, y0, 2, 0); + x0, y0); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); + /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[NUM_CHANNELS]; LLVMValueRef depth_ptr_i; if(i != 0) - lp_build_interp_soa_update(&interp); + lp_build_interp_soa_update(&interp, i); fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); -- cgit v1.2.3 From e01fa1eaec34675d0b30127de4f78b020a092a83 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 11:39:40 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index db22a8028a6..9470f834fc7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -41,6 +41,10 @@ #include "lp_bld_logic.h" +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \parm func one of PIPE_FUNC_x + */ LLVMValueRef lp_build_cmp(struct lp_build_context *bld, unsigned func, @@ -56,6 +60,9 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef res; unsigned i; + assert(func >= PIPE_FUNC_NEVER); + assert(func <= PIPE_FUNC_ALWAYS); + if(func == PIPE_FUNC_NEVER) return zeros; if(func == PIPE_FUNC_ALWAYS) @@ -68,6 +75,7 @@ lp_build_cmp(struct lp_build_context *bld, #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { + /* float[4] comparison */ LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -117,6 +125,7 @@ lp_build_cmp(struct lp_build_context *bld, return res; } else if(util_cpu_caps.has_sse2) { + /* int[4] comparison */ static const struct { unsigned swap:1; unsigned eq:1; -- cgit v1.2.3 From 69fe4281ea19e29d534c74d65789494f7be4d4e3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 11:40:49 -0700 Subject: llvmpipe: additional comment about float->uint conversion --- src/gallium/drivers/llvmpipe/lp_bld_conv.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 20c8710214b..1df938529c4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -124,6 +124,10 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ + /* YES: this fixes progs/trivial/tri-z-eq.c. + * Otherwise vertex Z=1.0 values get converted to something like + * 0xfffffb00 and the test for equality with 0xffffffff fails. + */ #if 0 { LLVMValueRef msb; -- cgit v1.2.3 From 51663f0506ed2534e57b798cdfaf8a0d376eb7a2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 11:41:45 -0700 Subject: llvmpipe: quick & dirty implementation of lp_rast_store_zstencil() This allows us to do a glReadPixels(GL_DEPTH_COMPONENT) to see what's in the depth buffer to help debugging. --- src/gallium/drivers/llvmpipe/lp_rast.c | 53 ++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index f88dd4ae680..c8359f45a2e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -95,6 +95,23 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, return FALSE; } + if (zsbuf) { + rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, width, height); + if (!rast->zsbuf_transfer) + return FALSE; + + rast->zsbuf_map = screen->transfer_map(rast->screen, + rast->zsbuf_transfer); + if (!rast->zsbuf_map) + return FALSE; + } + return TRUE; } @@ -117,7 +134,7 @@ void lp_rast_end( struct lp_rasterizer *rast ) screen->tex_transfer_destroy(rast->cbuf_transfer); if (rast->zsbuf_transfer) - screen->tex_transfer_destroy(rast->cbuf_transfer); + screen->tex_transfer_destroy(rast->zsbuf_transfer); rast->cbuf_transfer = NULL; rast->zsbuf_transfer = NULL; @@ -359,14 +376,44 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) } +static void +lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, + unsigned x0, unsigned y0, unsigned w, unsigned h) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < h; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4); + for (x = 0; x < w; ++x) { + *dst_pixel++ = *src++; + } + dst_row += dst_stride; + } +} + /** * Write the rasterizer's z/stencil tile to the framebuffer. */ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) { - RAST_DEBUG("%s\n", __FUNCTION__); + const unsigned x = rast->x; + const unsigned y = rast->y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (x + w > rast->width) + w -= x + w - rast->width; + + if (y + h > rast->height) + h -= y + h - rast->height; + + RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - /* FIXME: call u_tile func to store depth/stencil to surface */ + assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); + lp_tile_write_z32(rast->tile.depth, + rast->zsbuf_map, + rast->zsbuf_transfer->stride, + x, y, w, h); } -- cgit v1.2.3 From 51410a254c96779990995a2183eb742968df09e6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:13:22 -0700 Subject: llvmpipe: fix blend debug strings --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index aa9c0066333..c0d5a70a553 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -416,11 +416,11 @@ generate_fragment(struct llvmpipe_context *lp, } else if(key->blend.blend_enable) { debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + debug_printf("blend.rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("blend.rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("blend.alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("blend.alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("blend.alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); } -- cgit v1.2.3 From 3094fc200920f9d5eb62136d3b25896229fb0dbf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:25:08 -0700 Subject: llvmpipe: more debug info --- src/gallium/drivers/llvmpipe/lp_setup.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 70912323501..14b40dfe36d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -222,6 +222,8 @@ rasterize_bins( struct setup_context *setup, lp_rast_end( rast ); reset_context( setup ); + + SETUP_DEBUG("%s done \n", __FUNCTION__); } @@ -273,6 +275,8 @@ begin_binning( struct setup_context *setup ) else bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); } + + SETUP_DEBUG("%s done\n", __FUNCTION__); } @@ -422,7 +426,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %p %u\n", __FUNCTION__, (void *) input, nr); memcpy( setup->fs.input, input, nr * sizeof input[0] ); setup->fs.nr_inputs = nr; @@ -432,7 +436,7 @@ void lp_setup_set_fs( struct setup_context *setup, struct lp_fragment_shader *fs ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) fs); /* FIXME: reference count */ setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; @@ -442,7 +446,7 @@ void lp_setup_set_fs_constants(struct setup_context *setup, struct pipe_buffer *buffer) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) buffer); pipe_buffer_reference(&setup->constants.current, buffer); @@ -454,7 +458,7 @@ void lp_setup_set_alpha_ref_value( struct setup_context *setup, float alpha_ref_value ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + SETUP_DEBUG("%s %f\n", __FUNCTION__, alpha_ref_value); if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; -- cgit v1.2.3 From 0e042bed49c51fef38b02b7cc05efa504f2f703d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:25:46 -0700 Subject: llvmpipe: set LP_SETUP_NEW_FS in lp_setup_set_fs() Fixes progs/trivial/tri-blend.c, but I think we're just getting lucky in this case. --- src/gallium/drivers/llvmpipe/lp_setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 14b40dfe36d..142fec4f800 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -440,6 +440,7 @@ lp_setup_set_fs( struct setup_context *setup, /* FIXME: reference count */ setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; + setup->dirty |= LP_SETUP_NEW_FS; } void -- cgit v1.2.3 From 29207a2ae6d3e6bae05621cb924c4a1940ce57fc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:57:13 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 3209e41c01f..a3ec82e3825 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -103,8 +103,8 @@ struct setup_context { struct { unsigned flags; - union lp_rast_cmd_arg color; - union lp_rast_cmd_arg zstencil; + union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */ + union lp_rast_cmd_arg zstencil; /**< lp_rast_clear_zstencil() cmd */ } clear; enum { @@ -117,8 +117,8 @@ struct setup_context { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; - const struct lp_rast_state *stored; - struct lp_rast_state current; + const struct lp_rast_state *stored; /**< what's in the bins */ + struct lp_rast_state current; /**< currently set state */ } fs; struct { -- cgit v1.2.3 From ffd0759973165368ac8ce07d9bcffeb0acf88e6f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 14:57:44 -0700 Subject: llvmpipe: comments and a stub for lp_rast_set_state() --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index c8359f45a2e..2d319777eef 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -161,6 +161,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, /** * Clear the rasterizer's current color tile. + * This is a bin command called during bin processing. */ void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) @@ -190,6 +191,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, /** * Clear the rasterizer's current z/stencil tile. + * This is a bin command called during bin processing. */ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) @@ -204,6 +206,10 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, } +/** + * Load tile color from the framebuffer surface. + * This is a bin command called during bin processing. + */ void lp_rast_load_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { @@ -212,6 +218,11 @@ void lp_rast_load_color( struct lp_rasterizer *rast, /* call u_tile func to load colors from surface */ } + +/** + * Load tile z/stencil from the framebuffer surface. + * This is a bin command called during bin processing. + */ void lp_rast_load_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { @@ -220,9 +231,25 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, /* call u_tile func to load depth (and stencil?) from surface */ } + +void lp_rast_set_state( struct lp_rasterizer *rast, + const union lp_rast_cmd_arg arg ) +{ + RAST_DEBUG("%s\n", __FUNCTION__); + + /* XXX to do */ +} + + + /* Within a tile: */ +/** + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle. + * This is a bin command called during bin processing. + */ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { @@ -242,6 +269,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, /** * Compute shading for a 4x4 block of pixels. + * This is a bin command called during bin processing. */ void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, -- cgit v1.2.3 From e2f46344560f8f1193b311ad41883011e67eea00 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 16:05:12 -0700 Subject: llvmpipe: checkpoint some initial state binning code --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2d319777eef..d5fe6e93699 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -235,6 +235,8 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { + const struct lp_rast_state *state = arg.set_state; + RAST_DEBUG("%s\n", __FUNCTION__); /* XXX to do */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 142fec4f800..36bd0ad4ddc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -594,14 +594,24 @@ lp_setup_update_shader_state( struct setup_context *setup ) memcmp(setup->fs.stored, &setup->fs.current, sizeof setup->fs.current) != 0) { - struct lp_rast_state *stored; - - stored = get_data(&setup->data, sizeof *stored); + /* The fs state that's been stored in the bins is different from + * the new, current state. So allocate a new lp_rast_state object + * and append it to the bin's setup data buffer. + */ + struct lp_rast_state *stored = + (struct lp_rast_state *) get_data(&setup->data, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, sizeof setup->fs.current); setup->fs.stored = stored; + +#if 0 + /* put the state-set command into all bins */ + bin_everywhere( setup, + lp_rast_set_state, + *setup->fs.stored ); +#endif } } } -- cgit v1.2.3 From a9be9cd8be0fac4be5d65430749666d2204b79a4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 16:23:52 -0700 Subject: llvmpipe: comments, clean-ups in lp_rast_priv.h --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 723bb5ad695..4c0dfe2282d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -33,8 +33,10 @@ struct pipe_transfer; struct pipe_screen; -/* We can choose whatever layout for the internal tile storage we - * prefer: + +/** + * A tile's color and depth memory. + * We can choose whatever layout for the internal tile storage we prefer. */ struct lp_rast_tile { @@ -44,21 +46,22 @@ struct lp_rast_tile }; -struct lp_rasterizer { +/** + * This is the state required while rasterizing a tile. + * The tile size is TILE_SIZE x TILE_SIZE pixels. + */ +struct lp_rasterizer +{ + struct lp_rast_tile tile; /** Tile color/z/stencil memory */ - /* We can choose whatever layout for the internal tile storage we - * prefer: - */ - struct lp_rast_tile tile; + unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + unsigned width, height; /**< Size of framebuffer, in pixels */ - unsigned x; - unsigned y; boolean clipped_tile; - boolean check_for_clipped_tiles; - unsigned width; - unsigned height; + /* Framebuffer stuff + */ struct pipe_screen *screen; struct pipe_transfer *cbuf_transfer; struct pipe_transfer *zsbuf_transfer; @@ -75,6 +78,8 @@ struct lp_rasterizer { char clear_stencil; } state; + /* Pixel blocks produced during rasterization + */ unsigned nr_blocks; struct { unsigned x; -- cgit v1.2.3 From 4e058f6c4803be5d9d676338d6aee2775b88b87c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:00:22 -0700 Subject: llvmpipe: fix incorrect array indexing when saving blend color --- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 36bd0ad4ddc..8ef764eb80a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -543,7 +543,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); for (j = 0; j < 16; ++j) - stored[i*4 + j] = c; + stored[i*16 + j] = c; } setup->blend_color.stored = stored; -- cgit v1.2.3 From 30c122a4c90b62ad3d6b7bacb61c04dd38a4b2df Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:27:10 -0700 Subject: llvmpipe: new comment in do_triangle_ccw() --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index cf862554067..c21c465a75d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -376,6 +376,11 @@ do_triangle_ccw(struct setup_context *setup, } } + /* + * All fields of 'tri' are now set. The remaining code here is + * concerned with binning. + */ + /* Convert to tile coordinates: */ minx = tri->minx / TILE_SIZE; -- cgit v1.2.3 From f492edee98e75bb4bace5469bfcdbff1a33609b1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:27:37 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a3ec82e3825..b502f00eea4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -77,11 +77,19 @@ struct data_block_list { }; +/** + * Point/line/triangle setup context. + * Note: "stored" below indicates data which is stored in the bins, + * not arbitrary malloc'd memory. + */ struct setup_context { struct lp_rasterizer *rast; - /* When there are multiple threads, will want to double-buffer the + /** + * Per-bin data goes into the 'tile' cmd_block_lists. + * Shared bin data goes into the 'data' buffer. + * When there are multiple threads, will want to double-buffer the * bin arrays: */ struct cmd_block_list tile[TILES_X][TILES_Y]; @@ -121,6 +129,7 @@ struct setup_context { struct lp_rast_state current; /**< currently set state */ } fs; + /** fragment shader constants */ struct { struct pipe_buffer *current; unsigned stored_size; @@ -132,7 +141,7 @@ struct setup_context { uint8_t *stored; } blend_color; - unsigned dirty; + unsigned dirty; /**< bitmask of LP_SETUP_x bits */ void (*point)( struct setup_context *, const float (*v0)[4]); @@ -163,7 +172,6 @@ void lp_setup_new_cmd_block( struct cmd_block_list *list ); static INLINE void *get_data( struct data_block_list *list, unsigned size) { - if (list->tail->used + size > DATA_BLOCK_SIZE) { lp_setup_new_data_block( list ); } @@ -189,7 +197,6 @@ static INLINE void *get_data_aligned( struct data_block_list *list, unsigned size, unsigned alignment ) { - if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { lp_setup_new_data_block( list ); } -- cgit v1.2.3 From 9c1debe208d07b57e88c65bae186bb339de7dee7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:28:02 -0700 Subject: llvmpipe: comment about blend color --- src/gallium/drivers/llvmpipe/lp_setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8ef764eb80a..1f303d7705e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -540,6 +540,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) stored = get_data_aligned(&setup->data, 4 * 16, 16); + /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); for (j = 0; j < 16; ++j) -- cgit v1.2.3 From c6057ab8ff8991eac9c73c267696b386f8e56c68 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Dec 2009 17:28:48 -0700 Subject: llvmpipe: comments regarding lp_rast_triangle --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index a50b73b27f6..ab21a778345 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -75,8 +75,11 @@ struct lp_rast_shader_inputs { }; -/* Rasterization information for a triangle known to be in this bin, +/** + * Rasterization information for a triangle known to be in this bin, * plus inputs to run the shader: + * These fields are tile- and bin-independent. + * Objects of this type are put into the setup_context::data buffer. */ struct lp_rast_triangle { /* bounding box of tri (in pixels) */ -- cgit v1.2.3 From 04e12e31b252e7a18862c3ac386f302665edb6e7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 09:14:40 -0700 Subject: llvmpipe: dynamic allocation of triangle a0/dadx/dady arrays Much less memory per triangle now. --- src/gallium/drivers/llvmpipe/lp_rast.h | 10 ++++------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 10 ++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index ab21a778345..435993d44d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -64,14 +64,12 @@ struct lp_rast_shader_inputs { const struct lp_rast_state *state; /* Attribute interpolation: - * * First coefficient is position. - * - * FIXME: reduce memory waste! + * These pointers point into the bin data buffer. */ - float a0[1 + PIPE_MAX_SHADER_INPUTS][4]; - float dadx[1 + PIPE_MAX_SHADER_INPUTS][4]; - float dady[1 + PIPE_MAX_SHADER_INPUTS][4]; + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index c21c465a75d..3b71bc4c034 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -177,6 +177,16 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned slot; + /* Allocate space for the a0, dadx and dady arrays + */ + { + unsigned bytes; + bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); + tri->inputs.a0 = get_data_aligned( &setup->data, bytes, 16 ); + tri->inputs.dadx = get_data_aligned( &setup->data, bytes, 16 ); + tri->inputs.dady = get_data_aligned( &setup->data, bytes, 16 ); + } + /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, 0, v1, v2, v3); -- cgit v1.2.3 From 1796ffd3bcf74a94c800717e77abaf9902c50b4d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 09:19:09 -0700 Subject: llvmpipe: fix typo, whitespace --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 3b71bc4c034..74ed0a9e8fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -117,7 +117,6 @@ static void perspective_coef( struct lp_rast_triangle *tri, float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; tri->inputs.a0[slot][i] = (a1 - @@ -191,7 +190,7 @@ static void setup_tri_coefficients( struct setup_context *setup, */ setup_fragcoord_coef(tri, 0, v1, v2, v3); - /* setup interpolation for all the remaining attrbutes: + /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < setup->fs.nr_inputs; slot++) { unsigned vert_attr = setup->fs.input[slot].src_index; -- cgit v1.2.3 From 9dca0100489c7a7c02af77da42a39dbe1560d7e2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 10:41:38 -0700 Subject: llvmpipe: struct cmd_bin Just introducing a new structure to represent a per-tile bin. --- src/gallium/drivers/llvmpipe/lp_setup.c | 11 ++++++----- src/gallium/drivers/llvmpipe/lp_setup_context.h | 19 +++++++++++++++---- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 1f303d7705e..fc7f4f6778e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -111,7 +111,7 @@ static void reset_context( struct setup_context *setup ) */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - struct cmd_block_list *list = &setup->tile[i][j]; + struct cmd_block_list *list = &setup->tile[i][j].commands; struct cmd_block *block; struct cmd_block *tmp; @@ -173,9 +173,10 @@ static void bin_everywhere( struct setup_context *setup, /** Rasterize commands for a single bin */ static void rasterize_bin( struct lp_rasterizer *rast, - struct cmd_block_list *commands, + const struct cmd_bin *bin, int x, int y) { + const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; unsigned k; @@ -666,7 +667,7 @@ lp_setup_destroy( struct setup_context *setup ) for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) - FREE(setup->tile[i][j].head); + FREE(setup->tile[i][j].commands.head); FREE(setup->data.head); @@ -691,8 +692,8 @@ lp_setup_create( struct pipe_screen *screen ) for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) - setup->tile[i][j].head = - setup->tile[i][j].tail = CALLOC_STRUCT(cmd_block); + setup->tile[i][j].commands.head = + setup->tile[i][j].commands.tail = CALLOC_STRUCT(cmd_block); setup->data.head = setup->data.tail = CALLOC_STRUCT(data_block); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index b502f00eea4..1715048f760 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -71,11 +71,20 @@ struct cmd_block_list { struct cmd_block *tail; }; +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + struct cmd_block_list commands; + struct lp_rast_state *curr_state; +}; + + struct data_block_list { struct data_block *head; struct data_block *tail; }; - + /** * Point/line/triangle setup context. @@ -87,12 +96,12 @@ struct setup_context { struct lp_rasterizer *rast; /** - * Per-bin data goes into the 'tile' cmd_block_lists. + * Per-bin data goes into the 'tile' bins. * Shared bin data goes into the 'data' buffer. * When there are multiple threads, will want to double-buffer the * bin arrays: */ - struct cmd_block_list tile[TILES_X][TILES_Y]; + struct cmd_bin tile[TILES_X][TILES_Y]; struct data_block_list data; /* size of framebuffer, in tiles */ @@ -212,10 +221,12 @@ static INLINE void *get_data_aligned( struct data_block_list *list, /* Add a command to a given bin. */ -static INLINE void bin_command( struct cmd_block_list *list, +static INLINE void bin_command( struct cmd_bin *bin, lp_rast_cmd cmd, union lp_rast_cmd_arg arg ) { + struct cmd_block_list *list = &bin->commands; + if (list->tail->count == CMD_BLOCK_MAX) { lp_setup_new_cmd_block( list ); } -- cgit v1.2.3 From b1659b9213f3eeee440590dfe379f0d193948307 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 11:50:40 -0700 Subject: llvmpipe: bin state-change commands Previously, each triangle had a pointer to the state to use for shading. Now we insert state-change commands into the bins. When we execute one of those commands we just update a 'current state' pointer and use that pointer when calling the jit shader. When inserting state-change commands into a bin we check if the previous command was also a state-change command and simply replace it. This avoids accumulating useless/redundant state-change commands. --- src/gallium/drivers/llvmpipe/lp_rast.c | 9 +- src/gallium/drivers/llvmpipe/lp_rast.h | 15 ++-- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 + src/gallium/drivers/llvmpipe/lp_setup.c | 113 +++++++++++++++++------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 - 6 files changed, 101 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d5fe6e93699..8f37a28e875 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -237,9 +237,10 @@ void lp_rast_set_state( struct lp_rasterizer *rast, { const struct lp_rast_state *state = arg.set_state; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s %p\n", __FUNCTION__, (void *) state); - /* XXX to do */ + /* just set the current state pointer for this rasterizer */ + rast->current_state = state; } @@ -279,7 +280,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned mask) { #if 1 - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = rast->current_state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; @@ -287,6 +288,8 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned ix, iy; int block_offset; + assert(state); + /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 435993d44d2..e9a1fa49add 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -55,18 +55,13 @@ struct lp_rast_state { }; -/* Coefficients necessary to run the shader at a given location: + +/** + * Coefficients necessary to run the shader at a given location. + * First coefficient is position. + * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - - /* Current rasterizer state: - */ - const struct lp_rast_state *state; - - /* Attribute interpolation: - * First coefficient is position. - * These pointers point into the bin data buffer. - */ float (*a0)[4]; float (*dadx)[4]; float (*dady)[4]; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 4c0dfe2282d..98111edff72 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -86,6 +86,8 @@ struct lp_rasterizer unsigned y; unsigned mask; } blocks[256]; + + const struct lp_rast_state *current_state; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index fc7f4f6778e..11a9fd2637c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -155,6 +155,34 @@ static void reset_context( struct setup_context *setup ) } +/** + * Return last command in the bin + */ +static lp_rast_cmd +lp_get_last_command( const struct cmd_bin *bin ) +{ + const struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + if (i > 0) + return tail->cmd[i - 1]; + else + return NULL; +} + + +/** + * Replace the arg of the last command in the bin. + */ +static void +lp_replace_last_command_arg( struct cmd_bin *bin, + const union lp_rast_cmd_arg arg ) +{ + struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + assert(i > 0); + tail->arg[i - 1] = arg; +} + /* Add a command to all active bins. @@ -170,6 +198,32 @@ static void bin_everywhere( struct setup_context *setup, } +/** + * Put a state-change command into all bins. + * If we find that the last command in a bin was also a state-change + * command, we can simply replace that one with the new one. + */ +static void +bin_state_command( struct setup_context *setup, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < setup->tiles_x; i++) { + for (j = 0; j < setup->tiles_y; j++) { + struct cmd_bin *bin = &setup->tile[i][j]; + lp_rast_cmd last_cmd = lp_get_last_command(bin); + if (last_cmd == cmd) { + lp_replace_last_command_arg(bin, arg); + } + else { + bin_command( bin, cmd, arg ); + } + } + } +} + + /** Rasterize commands for a single bin */ static void rasterize_bin( struct lp_rasterizer *rast, @@ -234,31 +288,6 @@ begin_binning( struct setup_context *setup ) { SETUP_DEBUG("%s\n", __FUNCTION__); - if (!setup->fb.cbuf && !setup->fb.zsbuf) { - setup->fb.width = 0; - setup->fb.height = 0; - } - else if (!setup->fb.zsbuf) { - setup->fb.width = setup->fb.cbuf->width; - setup->fb.height = setup->fb.cbuf->height; - } - else if (!setup->fb.cbuf) { - setup->fb.width = setup->fb.zsbuf->width; - setup->fb.height = setup->fb.zsbuf->height; - } - else { - /* XXX: not sure what we're really supposed to do for - * mis-matched color & depth buffer sizes. - */ - setup->fb.width = MIN2(setup->fb.cbuf->width, - setup->fb.zsbuf->width); - setup->fb.height = MIN2(setup->fb.cbuf->height, - setup->fb.zsbuf->height); - } - - setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; - setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; - if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, @@ -352,8 +381,34 @@ lp_setup_bind_framebuffer( struct setup_context *setup, pipe_surface_reference( &setup->fb.cbuf, color ); pipe_surface_reference( &setup->fb.zsbuf, zstencil ); + + if (!setup->fb.cbuf && !setup->fb.zsbuf) { + setup->fb.width = 0; + setup->fb.height = 0; + } + else if (!setup->fb.zsbuf) { + setup->fb.width = setup->fb.cbuf->width; + setup->fb.height = setup->fb.cbuf->height; + } + else if (!setup->fb.cbuf) { + setup->fb.width = setup->fb.zsbuf->width; + setup->fb.height = setup->fb.zsbuf->height; + } + else { + /* XXX: not sure what we're really supposed to do for + * mis-matched color & depth buffer sizes. + */ + setup->fb.width = MIN2(setup->fb.cbuf->width, + setup->fb.zsbuf->width); + setup->fb.height = MIN2(setup->fb.cbuf->height, + setup->fb.zsbuf->height); + } + + setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; + setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; } + void lp_setup_clear( struct setup_context *setup, const float *color, @@ -608,12 +663,10 @@ lp_setup_update_shader_state( struct setup_context *setup ) sizeof setup->fs.current); setup->fs.stored = stored; -#if 0 /* put the state-set command into all bins */ - bin_everywhere( setup, - lp_rast_set_state, - *setup->fs.stored ); -#endif + bin_state_command( setup, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 1715048f760..7c7c34f3f76 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -76,10 +76,14 @@ struct cmd_block_list { */ struct cmd_bin { struct cmd_block_list commands; - struct lp_rast_state *curr_state; }; +/** + * This stores bulk data which is shared by all bins. + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + */ struct data_block_list { struct data_block *head; struct data_block *tail; @@ -241,5 +245,4 @@ static INLINE void bin_command( struct cmd_bin *bin, } - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 74ed0a9e8fb..48733a599b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -295,8 +295,6 @@ do_triangle_ccw(struct setup_context *setup, return; } - tri->inputs.state = setup->fs.stored; - /* */ tri->oneoverarea = ((float)FIXED_ONE) / (float)area; -- cgit v1.2.3 From 5c7d1b592ad9ce9e7ee36610f17d41e5c2881d54 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 11:58:24 -0700 Subject: llvmpipe: remove lp_rast_triangle::oneoverarea field Makes lp_rast_triangle a little smaller (now 280 bytes on a 32-bit system). --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 ----- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 32 ++++++++++++++++------------- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e9a1fa49add..a119b089bd8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -108,11 +108,6 @@ struct lp_rast_triangle { int step[3][16]; - /* XXX: this is only used inside lp_setup_tri.c, don't really - * need it here: - */ - float oneoverarea; - /* inputs for the shader */ struct lp_rast_shader_inputs inputs; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 48733a599b7..56a32d0ac00 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -54,6 +54,7 @@ static void constant_coef( struct lp_rast_triangle *tri, * for a triangle. */ static void linear_coef( struct lp_rast_triangle *tri, + float oneoverarea, unsigned slot, const float (*v1)[4], const float (*v2)[4], @@ -67,8 +68,8 @@ static void linear_coef( struct lp_rast_triangle *tri, float da12 = a1 - a2; float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; @@ -100,6 +101,7 @@ static void linear_coef( struct lp_rast_triangle *tri, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_rast_triangle *tri, + float oneoverarea, unsigned slot, const float (*v1)[4], const float (*v2)[4], @@ -114,8 +116,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, float a3 = v3[vert_attr][i] * v3[0][3]; float da12 = a1 - a2; float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; @@ -133,6 +135,7 @@ static void perspective_coef( struct lp_rast_triangle *tri, */ static void setup_fragcoord_coef(struct lp_rast_triangle *tri, + float oneoverarea, unsigned slot, const float (*v1)[4], const float (*v2)[4], @@ -147,9 +150,9 @@ setup_fragcoord_coef(struct lp_rast_triangle *tri, tri->inputs.dadx[slot][1] = 0.0; tri->inputs.dady[slot][1] = 1.0; /*Z*/ - linear_coef(tri, slot, v1, v2, v3, 0, 2); + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2); /*W*/ - linear_coef(tri, slot, v1, v2, v3, 0, 3); + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3); } @@ -169,10 +172,11 @@ static void setup_facing_coef( struct lp_rast_triangle *tri, */ static void setup_tri_coefficients( struct setup_context *setup, struct lp_rast_triangle *tri, + float oneoverarea, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], - boolean frontface ) + boolean frontface) { unsigned slot; @@ -188,7 +192,7 @@ static void setup_tri_coefficients( struct setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, 0, v1, v2, v3); + setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); /* setup interpolation for all the remaining attributes: */ @@ -204,18 +208,18 @@ static void setup_tri_coefficients( struct setup_context *setup, case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); + linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); + perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_POSITION: /* XXX: fix me - duplicates the values in slot zero. */ - setup_fragcoord_coef(tri, slot+1, v1, v2, v3); + setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: @@ -260,7 +264,7 @@ do_triangle_ccw(struct setup_context *setup, const int y3 = subpixel_snap(v3[0][1]); struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); - float area; + float area, oneoverarea; int minx, maxx, miny, maxy; tri->dx12 = x1 - x2; @@ -297,11 +301,11 @@ do_triangle_ccw(struct setup_context *setup, /* */ - tri->oneoverarea = ((float)FIXED_ONE) / (float)area; + oneoverarea = ((float)FIXED_ONE) / (float)area; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing ); + setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); /* half-edge constants, will be interated over the whole * rendertarget. -- cgit v1.2.3 From d9dc3d59760a28d54013d3d164f61d85ec807651 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 12:54:37 -0700 Subject: llvmpipe: move bin-related structures and functions into new lp_bin.[ch] And put lp_ prefixes on some functions. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 7 +- src/gallium/drivers/llvmpipe/lp_bin.c | 51 ++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 167 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 5 + src/gallium/drivers/llvmpipe/lp_setup.c | 27 +--- src/gallium/drivers/llvmpipe/lp_setup_context.h | 116 +--------------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 18 +-- 8 files changed, 243 insertions(+), 149 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bin.c create mode 100644 src/gallium/drivers/llvmpipe/lp_bin.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index bfe34396d95..0a5d1b9f1b0 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -6,6 +6,7 @@ LIBNAME = llvmpipe CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ + lp_bin.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3530e739cc4..4aef3387353 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -19,6 +19,7 @@ env.CodeGenerate( llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ + 'lp_bin.c', 'lp_bld_alpha.c', 'lp_bld_arit.c', 'lp_bld_blend_aos.c', @@ -46,7 +47,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_flush.c', 'lp_jit.c', 'lp_prim_vbuf.c', - 'lp_query.c', + 'lp_query.c', 'lp_setup.c', 'lp_setup_tri.c', 'lp_setup_line.c', @@ -62,8 +63,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_rast.c', - 'lp_rast_tri.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c new file mode 100644 index 00000000000..f43cdcbf3de --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "lp_bin.h" + + +void +lp_bin_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} + + +void +lp_bin_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h new file mode 100644 index 00000000000..fa25d786311 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -0,0 +1,167 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Binner data structures and bin-related functions. + * Note: the "setup" code is concerned with building bins while + * The "rast" code is concerned with consuming/executing bins. + */ + +#ifndef LP_BIN_H +#define LP_BIN_H + +#include "lp_rast.h" + + +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) + + + +/* switch to a non-pointer value for this: + */ +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); + +struct cmd_block { + lp_rast_cmd cmd[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; + unsigned count; + struct cmd_block *next; +}; + +struct data_block { + ubyte data[DATA_BLOCK_SIZE]; + unsigned used; + struct data_block *next; +}; + +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; +}; + +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + struct cmd_block_list commands; +}; + + +/** + * This stores bulk data which is shared by all bins. + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + */ +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + + + +extern void lp_bin_new_data_block( struct data_block_list *list ); + +extern void lp_bin_new_cmd_block( struct cmd_block_list *list ); + + +/** + * Allocate space for a command/data in the given block list. + * Grow the block list if needed. + */ +static INLINE void * +lp_bin_alloc( struct data_block_list *list, unsigned size) +{ + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + tail->used += size; + return data; + } +} + + +/** + * As above, but with specific alignment. + */ +static INLINE void * +lp_bin_alloc_aligned( struct data_block_list *list, unsigned size, + unsigned alignment ) +{ + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + tail->used += offset + size; + return data + offset; + } +} + + +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static INLINE void +lp_bin_putback_data( struct data_block_list *list, unsigned size) +{ + assert(list->tail->used >= size); + list->tail->used -= size; +} + + +/* Add a command to a given bin. + */ +static INLINE void +lp_bin_command( struct cmd_bin *bin, + lp_rast_cmd cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_block_list *list = &bin->commands; + + if (list->tail->count == CMD_BLOCK_MAX) { + lp_bin_new_cmd_block( list ); + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } +} + + +#endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index a119b089bd8..307c45cb9fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -43,6 +43,11 @@ struct pipe_screen; #define FIXED_ONE (1<tail->next = block; - list->tail = block; - block->next = NULL; - block->count = 0; -} - -void lp_setup_new_data_block( struct data_block_list *list ) -{ - struct data_block *block = MALLOC_STRUCT(data_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->used = 0; -} static void @@ -194,7 +177,7 @@ static void bin_everywhere( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - bin_command( &setup->tile[i][j], cmd, arg ); + lp_bin_command( &setup->tile[i][j], cmd, arg ); } @@ -217,7 +200,7 @@ bin_state_command( struct setup_context *setup, lp_replace_last_command_arg(bin, arg); } else { - bin_command( bin, cmd, arg ); + lp_bin_command( bin, cmd, arg ); } } } @@ -594,7 +577,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = get_data_aligned(&setup->data, 4 * 16, 16); + stored = lp_bin_alloc_aligned(&setup->data, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -626,7 +609,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = get_data(&setup->data, current_size); + stored = lp_bin_alloc(&setup->data, current_size); if(stored) { memcpy(stored, current_data, @@ -656,7 +639,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) get_data(&setup->data, sizeof *stored); + (struct lp_rast_state *) lp_bin_alloc(&setup->data, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 7c7c34f3f76..5abe66f5862 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -31,6 +31,7 @@ #include "lp_setup.h" #include "lp_rast.h" #include "lp_tile_soa.h" /* for TILE_SIZE */ +#include "lp_bin.h" /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -40,56 +41,12 @@ #define TILES_X (MAXWIDTH / TILE_SIZE) #define TILES_Y (MAXHEIGHT / TILE_SIZE) -#define CMD_BLOCK_MAX 128 -#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) - #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 #define LP_SETUP_NEW_BLEND_COLOR 0x04 -/* switch to a non-pointer value for this: - */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); - -struct cmd_block { - lp_rast_cmd cmd[CMD_BLOCK_MAX]; - union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; - unsigned count; - struct cmd_block *next; -}; - -struct data_block { - ubyte data[DATA_BLOCK_SIZE]; - unsigned used; - struct data_block *next; -}; - -struct cmd_block_list { - struct cmd_block *head; - struct cmd_block *tail; -}; - -/** - * For each screen tile we have one of these bins. - */ -struct cmd_bin { - struct cmd_block_list commands; -}; - - -/** - * This stores bulk data which is shared by all bins. - * Examples include triangle data and state data. The commands in - * the per-tile bins will point to chunks of data in this structure. - */ -struct data_block_list { - struct data_block *head; - struct data_block *tail; -}; - - /** * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, @@ -174,75 +131,4 @@ void lp_setup_choose_line( struct setup_context *setup ); void lp_setup_choose_point( struct setup_context *setup ); -void lp_setup_new_data_block( struct data_block_list *list ); -void lp_setup_new_cmd_block( struct cmd_block_list *list ); - - -/** - * Allocate space for a command/data in the given block list. - * Grow the block list if needed. - */ -static INLINE void *get_data( struct data_block_list *list, - unsigned size) -{ - if (list->tail->used + size > DATA_BLOCK_SIZE) { - lp_setup_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - tail->used += size; - return data; - } -} - -/* Put back data if we decide not to use it, eg. culled triangles. - */ -static INLINE void putback_data( struct data_block_list *list, - unsigned size) -{ - list->tail->used -= size; -} - - -static INLINE void *get_data_aligned( struct data_block_list *list, - unsigned size, - unsigned alignment ) -{ - if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { - lp_setup_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; - tail->used += offset + size; - return data + offset; - } -} - -/* Add a command to a given bin. - */ -static INLINE void bin_command( struct cmd_bin *bin, - lp_rast_cmd cmd, - union lp_rast_cmd_arg arg ) -{ - struct cmd_block_list *list = &bin->commands; - - if (list->tail->count == CMD_BLOCK_MAX) { - lp_setup_new_cmd_block( list ); - } - - { - struct cmd_block *tail = list->tail; - unsigned i = tail->count; - tail->cmd[i] = cmd; - tail->arg[i] = arg; - tail->count++; - } -} - - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 56a32d0ac00..5e53b4050e8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -185,9 +185,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = get_data_aligned( &setup->data, bytes, 16 ); - tri->inputs.dadx = get_data_aligned( &setup->data, bytes, 16 ); - tri->inputs.dady = get_data_aligned( &setup->data, bytes, 16 ); + tri->inputs.a0 = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); + tri->inputs.dadx = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); + tri->inputs.dady = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); } /* The internal position input is in slot zero: @@ -263,7 +263,7 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); + struct lp_rast_triangle *tri = lp_bin_alloc( &setup->data, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -283,7 +283,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->data, sizeof *tri ); return; } @@ -295,7 +295,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->data, sizeof *tri ); return; } @@ -405,7 +405,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - bin_command( &setup->tile[minx][miny], lp_rast_triangle, + lp_bin_command( &setup->tile[minx][miny], lp_rast_triangle, lp_rast_arg_triangle(tri) ); } else @@ -464,7 +464,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - bin_command( &setup->tile[x][y], + lp_bin_command( &setup->tile[x][y], lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } @@ -472,7 +472,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* shade partial tile */ - bin_command( &setup->tile[x][y], + lp_bin_command( &setup->tile[x][y], lp_rast_triangle, lp_rast_arg_triangle(tri) ); } -- cgit v1.2.3 From 49a720c5cdfb500c323ae2411b39f4609d14f021 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 12:59:34 -0700 Subject: llvmpipe: comments and minor clean-ups --- src/gallium/drivers/llvmpipe/lp_rast.h | 27 +++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +++++++ 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 307c45cb9fc..21bbf104b15 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -25,12 +25,22 @@ * **************************************************************************/ +/** + * The rast code is concerned with rasterization of command bins. + * Each screen tile has a bin associated with it. To render the + * scene we iterate over the tile bins and execute the commands + * in each bin. + * We'll do that with multiple threads... + */ + + #ifndef LP_RAST_H #define LP_RAST_H #include "pipe/p_compiler.h" #include "lp_jit.h" + /* Initially create and program a single rasterizer directly. Later * will want multiple of these, one or two per core. At that stage * will probably pass command buffers into the rasterizers rather than @@ -57,7 +67,6 @@ struct lp_rast_state { * the tile color/z/stencil data somehow: */ lp_jit_frag_func jit_function; - }; @@ -121,6 +130,9 @@ struct lp_rast_triangle { struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); +void lp_rast_destroy( struct lp_rasterizer * ); + + boolean lp_rast_begin( struct lp_rasterizer *rast, struct pipe_surface *cbuf, struct pipe_surface *zsbuf, @@ -147,6 +159,7 @@ union lp_rast_cmd_arg { unsigned clear_zstencil; }; + /* Cast wrappers. Hopefully these compile to noops! */ static INLINE const union lp_rast_cmd_arg @@ -183,10 +196,12 @@ lp_rast_arg_null( void ) - - -/* Binnable Commands: +/** + * Binnable Commands. + * These get put into bins by the setup code and are called when + * the bins are executed. */ + void lp_rast_clear_color( struct lp_rasterizer *, const union lp_rast_cmd_arg ); @@ -214,9 +229,5 @@ void lp_rast_shade_tile( struct lp_rasterizer *, void lp_rast_end_tile( struct lp_rasterizer *rast ); -/* Shutdown: - */ -void lp_rast_destroy( struct lp_rasterizer * ); - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 5abe66f5862..180b8f6e880 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -25,6 +25,13 @@ * **************************************************************************/ + +/** + * The setup code is concerned with point/line/triangle setup and + * putting commands/data into the bins. + */ + + #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H -- cgit v1.2.3 From 24a3b0d23a93378d77198f1c92f6f381c0ad05b8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 13:01:03 -0700 Subject: llvmpipe: add missing sources to Makefile --- src/gallium/drivers/llvmpipe/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 0a5d1b9f1b0..6ff45d0f058 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -37,6 +37,8 @@ C_SOURCES = \ lp_rast.c \ lp_rast_tri.c \ lp_setup.c \ + lp_setup_line.c \ + lp_setup_point.c \ lp_setup_tri.c \ lp_query.c \ lp_screen.c \ -- cgit v1.2.3 From 15a2a588d88ae02e575ff1ef9287c789ebdadead Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 13:53:28 -0700 Subject: llvmpipe: fix-up polygon culling/winding --- src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 4561c6b8456..282ed2e9ea3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -29,6 +29,7 @@ #include "util/u_memory.h" #include "lp_context.h" #include "lp_state.h" +#include "lp_setup.h" #include "draw/draw_context.h" @@ -50,6 +51,16 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + /* Note: we can immediately set the triangle state here and + * not worry about binning because we handle culling during + * triangle setup, not when rasterizing the bins. + */ + if (llvmpipe->rasterizer) { + lp_setup_set_triangle_state( llvmpipe->setup, + llvmpipe->rasterizer->cull_mode, + llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW ); + } + llvmpipe->dirty |= LP_NEW_RASTERIZER; } -- cgit v1.2.3 From a08d6302168341001003da32d42cfcff2311fa04 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 14:11:25 -0700 Subject: llvmpipe: use LP_DBG() macro everywhere --- src/gallium/drivers/llvmpipe/lp_rast.c | 24 +++++++++--------- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 3 ++- src/gallium/drivers/llvmpipe/lp_setup.c | 40 +++++++++++++++--------------- 3 files changed, 34 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 8f37a28e875..5891a2a706d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -27,13 +27,13 @@ #include "util/u_memory.h" +#include "lp_debug.h" #include "lp_state.h" #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" #include "lp_bld_debug.h" -#define RAST_DEBUG debug_printf struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) { @@ -65,7 +65,7 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, { struct pipe_screen *screen = rast->screen; - RAST_DEBUG("%s %dx%d\n", __FUNCTION__, width, height); + LP_DBG(DEBUG_RAST, "%s %dx%d\n", __FUNCTION__, width, height); pipe_surface_reference(&rast->state.cbuf, cbuf); pipe_surface_reference(&rast->state.zsbuf, zsbuf); @@ -152,7 +152,7 @@ void lp_rast_start_tile( struct lp_rasterizer *rast, unsigned x, unsigned y ) { - RAST_DEBUG("%s %d,%d\n", __FUNCTION__, x, y); + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); rast->x = x; rast->y = y; @@ -168,7 +168,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; - RAST_DEBUG("%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, + LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], clear_color[1], clear_color[2], @@ -198,7 +198,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, { unsigned i, j; - RAST_DEBUG("%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); + LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) @@ -213,7 +213,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, void lp_rast_load_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg) { - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* call u_tile func to load colors from surface */ } @@ -226,7 +226,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, void lp_rast_load_zstencil( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* call u_tile func to load depth (and stencil?) from surface */ } @@ -237,7 +237,7 @@ void lp_rast_set_state( struct lp_rasterizer *rast, { const struct lp_rast_state *state = arg.set_state; - RAST_DEBUG("%s %p\n", __FUNCTION__, (void *) state); + LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ rast->current_state = state; @@ -260,7 +260,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const unsigned mask = ~0; unsigned x, y; - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* Use the existing preference for 4x4 (four quads) shading: */ @@ -398,7 +398,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) if (y + h > rast->height) h -= y + h - rast->height; - RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, rast->tile.color, @@ -440,7 +440,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) if (y + h > rast->height) h -= y + h - rast->height; - RAST_DEBUG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); lp_tile_write_z32(rast->tile.depth, @@ -455,7 +455,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) */ void lp_rast_end_tile( struct lp_rasterizer *rast ) { - RAST_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); if (rast->state.write_color) lp_rast_store_color(rast); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index e772a0158a6..81a9c1c1422 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -30,6 +30,7 @@ */ #include "util/u_math.h" +#include "lp_debug.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" @@ -185,7 +186,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); - debug_printf("%s\n", __FUNCTION__); + LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); rast->nr_blocks = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6136d1b57e3..7f31df6ae50 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -37,12 +37,12 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "lp_debug.h" #include "lp_state.h" #include "lp_buffer.h" #include "lp_texture.h" #include "lp_setup_context.h" -#define SETUP_DEBUG debug_printf static void set_state( struct setup_context *, unsigned ); @@ -82,7 +82,7 @@ static void reset_context( struct setup_context *setup ) { unsigned i, j; - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); /* Reset derived state */ setup->constants.stored_size = 0; @@ -238,7 +238,7 @@ rasterize_bins( struct setup_context *setup, struct lp_rasterizer *rast = setup->rast; unsigned i, j; - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_rast_begin( rast, setup->fb.cbuf, @@ -261,7 +261,7 @@ rasterize_bins( struct setup_context *setup, reset_context( setup ); - SETUP_DEBUG("%s done \n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -269,7 +269,7 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->fb.cbuf) { if (setup->clear.flags & PIPE_CLEAR_COLOR) @@ -289,7 +289,7 @@ begin_binning( struct setup_context *setup ) bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); } - SETUP_DEBUG("%s done\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); } @@ -301,7 +301,7 @@ begin_binning( struct setup_context *setup ) static void execute_clears( struct setup_context *setup ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); rasterize_bins( setup, TRUE ); @@ -317,7 +317,7 @@ set_state( struct setup_context *setup, if (old_state == new_state) return; - SETUP_DEBUG("%s old %d new %d\n", __FUNCTION__, old_state, new_state); + LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state); switch (new_state) { case SETUP_ACTIVE: @@ -347,7 +347,7 @@ void lp_setup_flush( struct setup_context *setup, unsigned flags ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); } @@ -358,7 +358,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, struct pipe_surface *color, struct pipe_surface *zstencil ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); @@ -401,7 +401,7 @@ lp_setup_clear( struct setup_context *setup, { unsigned i; - SETUP_DEBUG("%s state %d\n", __FUNCTION__, setup->state); + LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); if (flags & PIPE_CLEAR_COLOR) { @@ -451,7 +451,7 @@ lp_setup_set_triangle_state( struct setup_context *setup, unsigned cull_mode, boolean ccw_is_frontface) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; @@ -465,7 +465,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { - SETUP_DEBUG("%s %p %u\n", __FUNCTION__, (void *) input, nr); + LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr); memcpy( setup->fs.input, input, nr * sizeof input[0] ); setup->fs.nr_inputs = nr; @@ -475,7 +475,7 @@ void lp_setup_set_fs( struct setup_context *setup, struct lp_fragment_shader *fs ) { - SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) fs); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) fs); /* FIXME: reference count */ setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; @@ -486,7 +486,7 @@ void lp_setup_set_fs_constants(struct setup_context *setup, struct pipe_buffer *buffer) { - SETUP_DEBUG("%s %p\n", __FUNCTION__, (void *) buffer); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer); pipe_buffer_reference(&setup->constants.current, buffer); @@ -498,7 +498,7 @@ void lp_setup_set_alpha_ref_value( struct setup_context *setup, float alpha_ref_value ) { - SETUP_DEBUG("%s %f\n", __FUNCTION__, alpha_ref_value); + LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value); if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; @@ -510,7 +510,7 @@ void lp_setup_set_blend_color( struct setup_context *setup, const struct pipe_blend_color *blend_color ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(blend_color); @@ -527,7 +527,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, struct pipe_texture *dummy; unsigned i; - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(num <= PIPE_MAX_SAMPLERS); @@ -569,7 +569,7 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(setup->fs.current.jit_function); @@ -685,7 +685,7 @@ lp_setup_tri(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { - SETUP_DEBUG("%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_setup_update_shader_state(setup); setup->triangle( setup, v0, v1, v2 ); -- cgit v1.2.3 From 9fca3e065b9ab5ef1389a76934bc24ed2b287a76 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 14:22:08 -0700 Subject: llvmpipe: simplify framebuffer state code --- src/gallium/drivers/llvmpipe/lp_setup.c | 29 +++++++++++++++---------- src/gallium/drivers/llvmpipe/lp_setup.h | 5 ++--- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +----- src/gallium/drivers/llvmpipe/lp_state_surface.c | 4 +--- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 7f31df6ae50..38609ec88a2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -241,12 +241,12 @@ rasterize_bins( struct setup_context *setup, LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_rast_begin( rast, - setup->fb.cbuf, - setup->fb.zsbuf, - setup->fb.cbuf != NULL, - setup->fb.zsbuf != NULL && write_depth, - setup->fb.width, - setup->fb.height ); + setup->fb->cbufs[0], + setup->fb->zsbuf, + setup->fb->cbufs[0] != NULL, + setup->fb->zsbuf != NULL && write_depth, + setup->fb->width, + setup->fb->height ); /* loop over tile bins, rasterize each */ for (i = 0; i < setup->tiles_x; i++) { @@ -271,7 +271,7 @@ begin_binning( struct setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (setup->fb.cbuf) { + if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, @@ -280,7 +280,7 @@ begin_binning( struct setup_context *setup ) bin_everywhere( setup, lp_rast_load_color, lp_rast_arg_null() ); } - if (setup->fb.zsbuf) { + if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, @@ -355,13 +355,13 @@ lp_setup_flush( struct setup_context *setup, void lp_setup_bind_framebuffer( struct setup_context *setup, - struct pipe_surface *color, - struct pipe_surface *zstencil ) + const struct pipe_framebuffer_state *fb ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); +#if 0 pipe_surface_reference( &setup->fb.cbuf, color ); pipe_surface_reference( &setup->fb.zsbuf, zstencil ); @@ -386,9 +386,14 @@ lp_setup_bind_framebuffer( struct setup_context *setup, setup->fb.height = MIN2(setup->fb.cbuf->height, setup->fb.zsbuf->height); } - setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; +#else + setup->fb = fb; + setup->tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; + setup->tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; +#endif + } @@ -411,7 +416,7 @@ lp_setup_clear( struct setup_context *setup, if (flags & PIPE_CLEAR_DEPTHSTENCIL) { setup->clear.zstencil.clear_zstencil = - util_pack_z_stencil(setup->fb.zsbuf->format, + util_pack_z_stencil(setup->fb->zsbuf->format, depth, stencil); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 1edd7410fc0..66a7f29f1e7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -53,7 +53,7 @@ struct pipe_surface; struct pipe_buffer; struct pipe_blend_color; struct pipe_screen; -struct setup_context; +struct pipe_framebuffer_state; struct lp_fragment_shader; struct lp_jit_context; @@ -90,8 +90,7 @@ lp_setup_flush( struct setup_context *setup, void lp_setup_bind_framebuffer( struct setup_context *setup, - struct pipe_surface *color, - struct pipe_surface *zstencil ); + const struct pipe_framebuffer_state *fb ); void lp_setup_set_triangle_state( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 180b8f6e880..dc12eb78471 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -79,12 +79,7 @@ struct setup_context { boolean ccw_is_frontface; unsigned cullmode; - struct { - struct pipe_surface *cbuf; - struct pipe_surface *zsbuf; - unsigned width; - unsigned height; - } fb; + const struct pipe_framebuffer_state *fb; struct { unsigned flags; diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 909ca9f1170..3eff40e3f19 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -83,9 +83,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } if (dirty) { - lp_setup_bind_framebuffer( lp->setup, - fb->cbufs[0], - fb->zsbuf ); + lp_setup_bind_framebuffer( lp->setup, fb ); lp->dirty |= LP_NEW_FRAMEBUFFER; } -- cgit v1.2.3 From b533b56750aca8c7e8cb22af93a0fc2a0cfc0d97 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 14:47:40 -0700 Subject: llvmpipe: move lp_rasterize_bin() into lp_rast.c First step of moving bin rasterization/execution code out of lp_setup.c --- src/gallium/drivers/llvmpipe/lp_rast.c | 37 +++++++++++++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 18 ++++++---------- src/gallium/drivers/llvmpipe/lp_setup.c | 29 +++----------------------- 3 files changed, 42 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5891a2a706d..a466aec379e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -33,6 +33,7 @@ #include "lp_rast_priv.h" #include "lp_tile_soa.h" #include "lp_bld_debug.h" +#include "lp_bin.h" struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) @@ -148,9 +149,9 @@ void lp_rast_end( struct lp_rasterizer *rast ) * \param x window X position of the tile, in pixels * \param y window Y position of the tile, in pixels */ -void lp_rast_start_tile( struct lp_rasterizer *rast, - unsigned x, - unsigned y ) +static void +lp_rast_start_tile( struct lp_rasterizer *rast, + unsigned x, unsigned y ) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); @@ -453,7 +454,8 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) /** * Write the rasterizer's tiles to the framebuffer. */ -void lp_rast_end_tile( struct lp_rasterizer *rast ) +static void +lp_rast_end_tile( struct lp_rasterizer *rast ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -465,6 +467,33 @@ void lp_rast_end_tile( struct lp_rasterizer *rast ) } +/** + * Rasterize commands for a single bin. + * Must be called between lp_rast_begin() and lp_rast_end(). + */ +void +lp_rasterize_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin, + int x, int y) +{ + const struct cmd_block_list *commands = &bin->commands; + struct cmd_block *block; + unsigned k; + + lp_rast_start_tile( rast, x, y ); + + /* simply execute each of the commands in the block list */ + for (block = commands->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, block->arg[k] ); + } + } + + lp_rast_end_tile( rast ); +} + + + /* Shutdown: */ void lp_rast_destroy( struct lp_rasterizer *rast ) diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 21bbf104b15..3d2388b8948 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -47,6 +47,7 @@ * individual function calls like this. */ struct lp_rasterizer; +struct cmd_bin; struct pipe_screen; #define FIXED_ORDER 4 @@ -141,14 +142,13 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, unsigned width, unsigned height ); -void lp_rast_end( struct lp_rasterizer * ); +void +lp_rasterize_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin, + int x, int y); -/* Begining of each tile: - */ -void lp_rast_start_tile( struct lp_rasterizer *, - unsigned x, - unsigned y ); +void lp_rast_end( struct lp_rasterizer * ); union lp_rast_cmd_arg { @@ -224,10 +224,4 @@ void lp_rast_shade_tile( struct lp_rasterizer *, const union lp_rast_cmd_arg ); -/* End of tile: - */ - -void lp_rast_end_tile( struct lp_rasterizer *rast ); - - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 38609ec88a2..47d2ac8e118 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -207,29 +207,6 @@ bin_state_command( struct setup_context *setup, } -/** Rasterize commands for a single bin */ -static void -rasterize_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin, - int x, int y) -{ - const struct cmd_block_list *commands = &bin->commands; - struct cmd_block *block; - unsigned k; - - lp_rast_start_tile( rast, x, y ); - - /* simply execute each of the commands in the block list */ - for (block = commands->head; block; block = block->next) { - for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, block->arg[k] ); - } - } - - lp_rast_end_tile( rast ); -} - - /** Rasterize all tile's bins */ static void rasterize_bins( struct setup_context *setup, @@ -251,9 +228,9 @@ rasterize_bins( struct setup_context *setup, /* loop over tile bins, rasterize each */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - rasterize_bin( rast, &setup->tile[i][j], - i * TILE_SIZE, - j * TILE_SIZE ); + lp_rasterize_bin( rast, &setup->tile[i][j], + i * TILE_SIZE, + j * TILE_SIZE ); } } -- cgit v1.2.3 From 01b1900084152dbacd4025a31ced25f75666ce59 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 15:31:09 -0700 Subject: llvmpipe: reorganization of binning data structions and funtions New lp_bins struct contains all bin information. More move bin-related code into lp_bin.[ch] Use new/updated bin-access functions to hide implementation details. The result is more/cleaner separation between the setup and rast components. This will make double-buffering of the bins easier, etc. --- src/gallium/drivers/llvmpipe/lp_bin.c | 78 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 61 +++++++++++++--- src/gallium/drivers/llvmpipe/lp_rast.c | 51 ++++++++++++-- src/gallium/drivers/llvmpipe/lp_rast.h | 21 ++---- src/gallium/drivers/llvmpipe/lp_setup.c | 93 ++++--------------------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 17 +---- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 +++---- 7 files changed, 209 insertions(+), 138 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index f43cdcbf3de..1f05416b3ee 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -29,6 +29,84 @@ #include "lp_bin.h" +void +lp_init_bins(struct lp_bins *bins) +{ + unsigned i, j; + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + + bins->data.head = + bins->data.tail = CALLOC_STRUCT(data_block); +} + + +void +lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y) +{ + unsigned i, j; + + /* Free all but last binner command lists: + */ + for (i = 0; i < tiles_x; i++) { + for (j = 0; j < tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + struct cmd_block_list *list = &bin->commands; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->count = 0; + } + } + + /* Free all but last binned data block: + */ + { + struct data_block_list *list = &bins->data; + struct data_block *block, *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->used = 0; + } +} + + +void +lp_free_bin_data(struct lp_bins *bins) +{ + unsigned i, j; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + /* lp_reset_bins() should have been already called */ + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; + } + + FREE(bins->data.head); + bins->data.head = NULL; +} + + void lp_bin_new_cmd_block( struct cmd_block_list *list ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index fa25d786311..4d12b932745 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -35,9 +35,19 @@ #ifndef LP_BIN_H #define LP_BIN_H +#include "lp_tile_soa.h" #include "lp_rast.h" +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 +#define TILES_X (MAXWIDTH / TILE_SIZE) +#define TILES_Y (MAXHEIGHT / TILE_SIZE) + + #define CMD_BLOCK_MAX 128 #define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) @@ -84,19 +94,40 @@ struct data_block_list { }; +/** + * All bins and bin data are contained here. + * Per-bin data goes into the 'tile' bins. + * Shared bin data goes into the 'data' buffer. + * When there are multiple threads, will want to double-buffer the + * bin arrays: + */ +struct lp_bins { + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; +}; + + + +void lp_init_bins(struct lp_bins *bins); -extern void lp_bin_new_data_block( struct data_block_list *list ); +void lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y); -extern void lp_bin_new_cmd_block( struct cmd_block_list *list ); +void lp_free_bin_data(struct lp_bins *bins); + +void lp_bin_new_data_block( struct data_block_list *list ); + +void lp_bin_new_cmd_block( struct cmd_block_list *list ); /** - * Allocate space for a command/data in the given block list. + * Allocate space for a command/data in the bin's data buffer. * Grow the block list if needed. */ static INLINE void * -lp_bin_alloc( struct data_block_list *list, unsigned size) +lp_bin_alloc( struct lp_bins *bins, unsigned size) { + struct data_block_list *list = &bins->data; + if (list->tail->used + size > DATA_BLOCK_SIZE) { lp_bin_new_data_block( list ); } @@ -114,9 +145,11 @@ lp_bin_alloc( struct data_block_list *list, unsigned size) * As above, but with specific alignment. */ static INLINE void * -lp_bin_alloc_aligned( struct data_block_list *list, unsigned size, +lp_bin_alloc_aligned( struct lp_bins *bins, unsigned size, unsigned alignment ) { + struct data_block_list *list = &bins->data; + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { lp_bin_new_data_block( list ); } @@ -134,20 +167,32 @@ lp_bin_alloc_aligned( struct data_block_list *list, unsigned size, /* Put back data if we decide not to use it, eg. culled triangles. */ static INLINE void -lp_bin_putback_data( struct data_block_list *list, unsigned size) +lp_bin_putback_data( struct lp_bins *bins, unsigned size) { + struct data_block_list *list = &bins->data; assert(list->tail->used >= size); list->tail->used -= size; } -/* Add a command to a given bin. +/** Return pointer to a particular tile's bin. */ +static INLINE struct cmd_bin * +lp_get_bin(struct lp_bins *bins, unsigned x, unsigned y) +{ + return &bins->tile[x][y]; +} + + + +/* Add a command to bin[x][y]. */ static INLINE void -lp_bin_command( struct cmd_bin *bin, +lp_bin_command( struct lp_bins *bins, + unsigned x, unsigned y, lp_rast_cmd cmd, union lp_rast_cmd_arg arg ) { + struct cmd_bin *bin = lp_get_bin(bins, x, y); struct cmd_block_list *list = &bin->commands; if (list->tail->count == CMD_BLOCK_MAX) { diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a466aec379e..87e3bfcd3f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -56,7 +56,8 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) * Begin the rasterization phase. * Map the framebuffer surfaces. Initialize the 'rast' state. */ -boolean lp_rast_begin( struct lp_rasterizer *rast, +static boolean +lp_rast_begin( struct lp_rasterizer *rast, struct pipe_surface *cbuf, struct pipe_surface *zsbuf, boolean write_color, @@ -121,7 +122,8 @@ boolean lp_rast_begin( struct lp_rasterizer *rast, * Finish the rasterization phase. * Unmap framebuffer surfaces. */ -void lp_rast_end( struct lp_rasterizer *rast ) +static void +lp_rast_end( struct lp_rasterizer *rast ) { struct pipe_screen *screen = rast->screen; @@ -469,12 +471,13 @@ lp_rast_end_tile( struct lp_rasterizer *rast ) /** * Rasterize commands for a single bin. + * \param x, y position of the bin's tile in the framebuffer * Must be called between lp_rast_begin() and lp_rast_end(). */ -void -lp_rasterize_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin, - int x, int y) +static void +rasterize_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin, + int x, int y) { const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; @@ -493,6 +496,42 @@ lp_rasterize_bin( struct lp_rasterizer *rast, } +/** + * Rasterize/execute all bins. + */ +void +lp_rasterize_bins( struct lp_rasterizer *rast, + struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y, + const struct pipe_framebuffer_state *fb, + bool write_depth ) +{ + unsigned i, j; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + lp_rast_begin( rast, + fb->cbufs[0], + fb->zsbuf, + fb->cbufs[0] != NULL, + fb->zsbuf != NULL && write_depth, + fb->width, + fb->height ); + + /* loop over tile bins, rasterize each */ + for (i = 0; i < tiles_x; i++) { + for (j = 0; j < tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); + } + } + + lp_rast_end( rast ); + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + /* Shutdown: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 3d2388b8948..e623eafc9ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -47,6 +47,7 @@ * individual function calls like this. */ struct lp_rasterizer; +struct lp_bins; struct cmd_bin; struct pipe_screen; @@ -133,22 +134,12 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); void lp_rast_destroy( struct lp_rasterizer * ); +void lp_rasterize_bins( struct lp_rasterizer *rast, + struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y, + const struct pipe_framebuffer_state *fb, + bool write_depth ); -boolean lp_rast_begin( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - boolean write_color, - boolean write_zstencil, - unsigned width, - unsigned height ); - -void -lp_rasterize_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin, - int x, int y); - - -void lp_rast_end( struct lp_rasterizer * ); union lp_rast_cmd_arg { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 47d2ac8e118..efaf5acfe84 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -80,8 +80,6 @@ first_point( struct setup_context *setup, static void reset_context( struct setup_context *setup ) { - unsigned i, j; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); /* Reset derived state */ @@ -90,40 +88,7 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - /* Free all but last binner command lists: - */ - for (i = 0; i < setup->tiles_x; i++) { - for (j = 0; j < setup->tiles_y; j++) { - struct cmd_block_list *list = &setup->tile[i][j].commands; - struct cmd_block *block; - struct cmd_block *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->count = 0; - } - } - - /* Free all but last binned data block: - */ - { - struct data_block_list *list = &setup->data; - struct data_block *block, *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->used = 0; - } + lp_reset_bins(&setup->bins, setup->tiles_x, setup->tiles_y); /* Reset some state: */ @@ -177,7 +142,7 @@ static void bin_everywhere( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) for (j = 0; j < setup->tiles_y; j++) - lp_bin_command( &setup->tile[i][j], cmd, arg ); + lp_bin_command( &setup->bins, i, j, cmd, arg ); } @@ -194,13 +159,13 @@ bin_state_command( struct setup_context *setup, unsigned i, j; for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { - struct cmd_bin *bin = &setup->tile[i][j]; + struct cmd_bin *bin = &setup->bins.tile[i][j]; lp_rast_cmd last_cmd = lp_get_last_command(bin); if (last_cmd == cmd) { lp_replace_last_command_arg(bin, arg); } else { - lp_bin_command( bin, cmd, arg ); + lp_bin_command( &setup->bins, i, j, cmd, arg ); } } } @@ -212,29 +177,10 @@ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { - struct lp_rasterizer *rast = setup->rast; - unsigned i, j; - - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - - lp_rast_begin( rast, - setup->fb->cbufs[0], - setup->fb->zsbuf, - setup->fb->cbufs[0] != NULL, - setup->fb->zsbuf != NULL && write_depth, - setup->fb->width, - setup->fb->height ); - - /* loop over tile bins, rasterize each */ - for (i = 0; i < setup->tiles_x; i++) { - for (j = 0; j < setup->tiles_y; j++) { - lp_rasterize_bin( rast, &setup->tile[i][j], - i * TILE_SIZE, - j * TILE_SIZE ); - } - } - - lp_rast_end( rast ); + lp_rasterize_bins(setup->rast, + &setup->bins, setup->tiles_x, setup->tiles_y, + setup->fb, + write_depth); reset_context( setup ); @@ -559,7 +505,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = lp_bin_alloc_aligned(&setup->data, 4 * 16, 16); + stored = lp_bin_alloc_aligned(&setup->bins, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -591,7 +537,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = lp_bin_alloc(&setup->data, current_size); + stored = lp_bin_alloc(&setup->bins, current_size); if(stored) { memcpy(stored, current_data, @@ -621,7 +567,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) lp_bin_alloc(&setup->data, sizeof *stored); + (struct lp_rast_state *) lp_bin_alloc(&setup->bins, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, @@ -677,17 +623,11 @@ lp_setup_tri(struct setup_context *setup, void lp_setup_destroy( struct setup_context *setup ) { - unsigned i, j; - reset_context( setup ); pipe_buffer_reference(&setup->constants.current, NULL); - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) - FREE(setup->tile[i][j].commands.head); - - FREE(setup->data.head); + lp_free_bin_data(&setup->bins); lp_rast_destroy( setup->rast ); FREE( setup ); @@ -702,19 +642,12 @@ struct setup_context * lp_setup_create( struct pipe_screen *screen ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); - unsigned i, j; setup->rast = lp_rast_create( screen ); if (!setup->rast) goto fail; - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) - setup->tile[i][j].commands.head = - setup->tile[i][j].commands.tail = CALLOC_STRUCT(cmd_block); - - setup->data.head = - setup->data.tail = CALLOC_STRUCT(data_block); + lp_init_bins(&setup->bins); setup->triangle = first_triangle; setup->line = first_line; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index dc12eb78471..8478bb9014c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -40,14 +40,6 @@ #include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_bin.h" -/* We're limited to 2K by 2K for 32bit fixed point rasterization. - * Will need a 64-bit version for larger framebuffers. - */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILE_SIZE) -#define TILES_Y (MAXHEIGHT / TILE_SIZE) - #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 @@ -63,14 +55,7 @@ struct setup_context { struct lp_rasterizer *rast; - /** - * Per-bin data goes into the 'tile' bins. - * Shared bin data goes into the 'data' buffer. - * When there are multiple threads, will want to double-buffer the - * bin arrays: - */ - struct cmd_bin tile[TILES_X][TILES_Y]; - struct data_block_list data; + struct lp_bins bins; /* size of framebuffer, in tiles */ unsigned tiles_x; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 5e53b4050e8..b8f79849e8b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -185,9 +185,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); - tri->inputs.dadx = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); - tri->inputs.dady = lp_bin_alloc_aligned( &setup->data, bytes, 16 ); + tri->inputs.a0 = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); + tri->inputs.dadx = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); + tri->inputs.dady = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); } /* The internal position input is in slot zero: @@ -263,7 +263,7 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_rast_triangle *tri = lp_bin_alloc( &setup->data, sizeof *tri ); + struct lp_rast_triangle *tri = lp_bin_alloc( &setup->bins, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -283,7 +283,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_bin_putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->bins, sizeof *tri ); return; } @@ -295,7 +295,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - lp_bin_putback_data( &setup->data, sizeof *tri ); + lp_bin_putback_data( &setup->bins, sizeof *tri ); return; } @@ -405,7 +405,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - lp_bin_command( &setup->tile[minx][miny], lp_rast_triangle, + lp_bin_command( &setup->bins, minx, miny, lp_rast_triangle, lp_rast_arg_triangle(tri) ); } else @@ -464,17 +464,17 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - lp_bin_command( &setup->tile[x][y], - lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); + lp_bin_command( &setup->bins, x, y, + lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); } else { in = 1; /* shade partial tile */ - lp_bin_command( &setup->tile[x][y], - lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_bin_command( &setup->bins, x, y, + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } /* Iterate cx values across the region: -- cgit v1.2.3 From 2c8d5c66ce2ddc0b7182e4844690736fc4c47212 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 15:46:37 -0700 Subject: llvmpipe: remove dead code left over from a previous commit --- src/gallium/drivers/llvmpipe/lp_setup.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index efaf5acfe84..e561e8e9b6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -284,39 +284,9 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_state( setup, SETUP_FLUSHED ); -#if 0 - pipe_surface_reference( &setup->fb.cbuf, color ); - pipe_surface_reference( &setup->fb.zsbuf, zstencil ); - - if (!setup->fb.cbuf && !setup->fb.zsbuf) { - setup->fb.width = 0; - setup->fb.height = 0; - } - else if (!setup->fb.zsbuf) { - setup->fb.width = setup->fb.cbuf->width; - setup->fb.height = setup->fb.cbuf->height; - } - else if (!setup->fb.cbuf) { - setup->fb.width = setup->fb.zsbuf->width; - setup->fb.height = setup->fb.zsbuf->height; - } - else { - /* XXX: not sure what we're really supposed to do for - * mis-matched color & depth buffer sizes. - */ - setup->fb.width = MIN2(setup->fb.cbuf->width, - setup->fb.zsbuf->width); - setup->fb.height = MIN2(setup->fb.cbuf->height, - setup->fb.zsbuf->height); - } - setup->tiles_x = align(setup->fb.width, TILE_SIZE) / TILE_SIZE; - setup->tiles_y = align(setup->fb.height, TILE_SIZE) / TILE_SIZE; -#else setup->fb = fb; setup->tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; setup->tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; -#endif - } -- cgit v1.2.3 From 8a23105fa016ec4368f407ca64e7763f110da4e5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Dec 2009 15:59:25 -0700 Subject: llvmpipe: still more bin code reorganization Move tiles_x,y fields from setup state into bin state. Move more bin-adding commands into lp_bin.[ch]. --- src/gallium/drivers/llvmpipe/lp_bin.c | 70 +++++++++++++- src/gallium/drivers/llvmpipe/lp_bin.h | 32 ++++++- src/gallium/drivers/llvmpipe/lp_rast.c | 5 +- src/gallium/drivers/llvmpipe/lp_rast.h | 1 - src/gallium/drivers/llvmpipe/lp_setup.c | 120 ++++++------------------ src/gallium/drivers/llvmpipe/lp_setup_context.h | 4 - 6 files changed, 130 insertions(+), 102 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index 1f05416b3ee..160a8d865ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -45,14 +45,14 @@ lp_init_bins(struct lp_bins *bins) void -lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y) +lp_reset_bins(struct lp_bins *bins ) { unsigned i, j; /* Free all but last binner command lists: */ - for (i = 0; i < tiles_x; i++) { - for (j = 0; j < tiles_y; j++) { + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { struct cmd_bin *bin = lp_get_bin(bins, i, j); struct cmd_block_list *list = &bin->commands; struct cmd_block *block; @@ -107,6 +107,14 @@ lp_free_bin_data(struct lp_bins *bins) } +void +lp_bin_set_num_bins( struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y ) +{ + bins->tiles_x = tiles_x; + bins->tiles_y = tiles_y; +} + void lp_bin_new_cmd_block( struct cmd_block_list *list ) { @@ -127,3 +135,59 @@ lp_bin_new_data_block( struct data_block_list *list ) block->next = NULL; block->used = 0; } + + +/** + * Return last command in the bin + */ +static lp_rast_cmd +lp_get_last_command( const struct cmd_bin *bin ) +{ + const struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + if (i > 0) + return tail->cmd[i - 1]; + else + return NULL; +} + + +/** + * Replace the arg of the last command in the bin. + */ +static void +lp_replace_last_command_arg( struct cmd_bin *bin, + const union lp_rast_cmd_arg arg ) +{ + struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + assert(i > 0); + tail->arg[i - 1] = arg; +} + + + +/** + * Put a state-change command into all bins. + * If we find that the last command in a bin was also a state-change + * command, we can simply replace that one with the new one. + */ +void +lp_bin_state_command( struct lp_bins *bins, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + lp_rast_cmd last_cmd = lp_get_last_command(bin); + if (last_cmd == cmd) { + lp_replace_last_command_arg(bin, arg); + } + else { + lp_bin_command( bins, i, j, cmd, arg ); + } + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 4d12b932745..fcbb975ad64 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -104,16 +104,26 @@ struct data_block_list { struct lp_bins { struct cmd_bin tile[TILES_X][TILES_Y]; struct data_block_list data; + + /** + * Number of active tiles in each dimension. + * This basically the framebuffer size divided by tile size + */ + unsigned tiles_x, tiles_y; }; void lp_init_bins(struct lp_bins *bins); -void lp_reset_bins(struct lp_bins *bins, unsigned tiles_x, unsigned tiles_y); +void lp_reset_bins(struct lp_bins *bins ); void lp_free_bin_data(struct lp_bins *bins); +void +lp_bin_set_num_bins( struct lp_bins *bins, + unsigned tiles_x, unsigned tiles_y ); + void lp_bin_new_data_block( struct data_block_list *list ); void lp_bin_new_cmd_block( struct cmd_block_list *list ); @@ -209,4 +219,24 @@ lp_bin_command( struct lp_bins *bins, } +/* Add a command to all active bins. + */ +static INLINE void +lp_bin_everywhere( struct lp_bins *bins, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) + for (j = 0; j < bins->tiles_y; j++) + lp_bin_command( bins, i, j, cmd, arg ); +} + + +void +lp_bin_state_command( struct lp_bins *bins, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ); + + #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 87e3bfcd3f5..642f1b90795 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -502,7 +502,6 @@ rasterize_bin( struct lp_rasterizer *rast, void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y, const struct pipe_framebuffer_state *fb, bool write_depth ) { @@ -519,8 +518,8 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->height ); /* loop over tile bins, rasterize each */ - for (i = 0; i < tiles_x; i++) { - for (j = 0; j < tiles_y; j++) { + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { struct cmd_bin *bin = lp_get_bin(bins, i, j); rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e623eafc9ad..e77c77b7762 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -136,7 +136,6 @@ void lp_rast_destroy( struct lp_rasterizer * ); void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y, const struct pipe_framebuffer_state *fb, bool write_depth ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e561e8e9b6a..4935d5b5407 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -88,7 +88,7 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - lp_reset_bins(&setup->bins, setup->tiles_x, setup->tiles_y); + lp_reset_bins( &setup->bins ); /* Reset some state: */ @@ -103,82 +103,13 @@ static void reset_context( struct setup_context *setup ) } -/** - * Return last command in the bin - */ -static lp_rast_cmd -lp_get_last_command( const struct cmd_bin *bin ) -{ - const struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - if (i > 0) - return tail->cmd[i - 1]; - else - return NULL; -} - - -/** - * Replace the arg of the last command in the bin. - */ -static void -lp_replace_last_command_arg( struct cmd_bin *bin, - const union lp_rast_cmd_arg arg ) -{ - struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - assert(i > 0); - tail->arg[i - 1] = arg; -} - - - -/* Add a command to all active bins. - */ -static void bin_everywhere( struct setup_context *setup, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < setup->tiles_x; i++) - for (j = 0; j < setup->tiles_y; j++) - lp_bin_command( &setup->bins, i, j, cmd, arg ); -} - - -/** - * Put a state-change command into all bins. - * If we find that the last command in a bin was also a state-change - * command, we can simply replace that one with the new one. - */ -static void -bin_state_command( struct setup_context *setup, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < setup->tiles_x; i++) { - for (j = 0; j < setup->tiles_y; j++) { - struct cmd_bin *bin = &setup->bins.tile[i][j]; - lp_rast_cmd last_cmd = lp_get_last_command(bin); - if (last_cmd == cmd) { - lp_replace_last_command_arg(bin, arg); - } - else { - lp_bin_command( &setup->bins, i, j, cmd, arg ); - } - } - } -} - - /** Rasterize all tile's bins */ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { lp_rasterize_bins(setup->rast, - &setup->bins, setup->tiles_x, setup->tiles_y, + &setup->bins, setup->fb, write_depth); @@ -196,20 +127,24 @@ begin_binning( struct setup_context *setup ) if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) - bin_everywhere( setup, - lp_rast_clear_color, - setup->clear.color ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_color, + setup->clear.color ); else - bin_everywhere( setup, lp_rast_load_color, lp_rast_arg_null() ); + lp_bin_everywhere( &setup->bins, + lp_rast_load_color, + lp_rast_arg_null() ); } if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - bin_everywhere( setup, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_zstencil, + setup->clear.zstencil ); else - bin_everywhere( setup, lp_rast_load_zstencil, lp_rast_arg_null() ); + lp_bin_everywhere( &setup->bins, + lp_rast_load_zstencil, + lp_rast_arg_null() ); } LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); @@ -280,13 +215,18 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { + unsigned tiles_x, tiles_y; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); set_state( setup, SETUP_FLUSHED ); setup->fb = fb; - setup->tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; - setup->tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; + + tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; + tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; + + lp_bin_set_num_bins(&setup->bins, tiles_x, tiles_y); } @@ -321,14 +261,14 @@ lp_setup_clear( struct setup_context *setup, * don't see that as being a common usage. */ if (flags & PIPE_CLEAR_COLOR) - bin_everywhere( setup, - lp_rast_clear_color, - setup->clear.color ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_color, + setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - bin_everywhere( setup, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_bin_everywhere( &setup->bins, + lp_rast_clear_zstencil, + setup->clear.zstencil ); } else { /* Put ourselves into the 'pre-clear' state, specifically to try @@ -545,9 +485,9 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; /* put the state-set command into all bins */ - bin_state_command( setup, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); + lp_bin_state_command( &setup->bins, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 8478bb9014c..9b47b595c62 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -57,10 +57,6 @@ struct setup_context { struct lp_bins bins; - /* size of framebuffer, in tiles */ - unsigned tiles_x; - unsigned tiles_y; - boolean ccw_is_frontface; unsigned cullmode; -- cgit v1.2.3 From 270f15486072b0a2fbea2a21b7a4a9d4c76d4bfb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 18:04:31 -0700 Subject: llvmpipe: introduce mutex and bin iteration functions --- src/gallium/drivers/llvmpipe/lp_bin.c | 68 +++++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 11 ++++++ 2 files changed, 79 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index 160a8d865ba..3e294e57994 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -41,9 +41,14 @@ lp_init_bins(struct lp_bins *bins) bins->data.head = bins->data.tail = CALLOC_STRUCT(data_block); + + pipe_mutex_init(bins->mutex); } +/** + * Set bins to empty state. + */ void lp_reset_bins(struct lp_bins *bins ) { @@ -87,6 +92,9 @@ lp_reset_bins(struct lp_bins *bins ) } +/** + * Free all data associated with the given bin, but don't free(bins). + */ void lp_free_bin_data(struct lp_bins *bins) { @@ -104,6 +112,8 @@ lp_free_bin_data(struct lp_bins *bins) FREE(bins->data.head); bins->data.head = NULL; + + pipe_mutex_destroy(bins->mutex); } @@ -191,3 +201,61 @@ lp_bin_state_command( struct lp_bins *bins, } } } + + +/** advance curr_x,y to the next bin */ +static boolean +next_bin(struct lp_bins *bins) +{ + bins->curr_x++; + if (bins->curr_x >= bins->tiles_x) { + bins->curr_x = 0; + bins->curr_y++; + } + if (bins->curr_y >= bins->tiles_y) { + /* no more bins */ + return FALSE; + } + return TRUE; +} + + +void +lp_bin_iter_begin( struct lp_bins *bins ) +{ + bins->curr_x = bins->curr_y = -1; +} + + +/** + * Return point to next bin to be rendered. + * The lp_bins::curr_x and ::curr_y fields will be advanced. + * Multiple rendering threads will call this function to get a chunk + * of work (a bin) to work on. + */ +struct cmd_bin * +lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ) +{ + struct cmd_bin *bin = NULL; + + pipe_mutex_lock(bins->mutex); + + if (bins->curr_x < 0) { + /* first bin */ + bins->curr_x = 0; + bins->curr_y = 0; + } + else if (!next_bin(bins)) { + /* no more bins left */ + goto end; + } + + bin = lp_get_bin(bins, bins->curr_x, bins->curr_y); + *bin_x = bins->curr_x; + *bin_y = bins->curr_y; + +end: + /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ + pipe_mutex_unlock(bins->mutex); + return bin; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index fcbb975ad64..24e599ea66e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -35,6 +35,7 @@ #ifndef LP_BIN_H #define LP_BIN_H +#include "pipe/p_thread.h" #include "lp_tile_soa.h" #include "lp_rast.h" @@ -110,6 +111,9 @@ struct lp_bins { * This basically the framebuffer size divided by tile size */ unsigned tiles_x, tiles_y; + + int curr_x, curr_y; /**< for iterating over bins */ + pipe_mutex mutex; }; @@ -239,4 +243,11 @@ lp_bin_state_command( struct lp_bins *bins, const union lp_rast_cmd_arg arg ); +void +lp_bin_iter_begin( struct lp_bins *bins ); + +struct cmd_bin * +lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ); + + #endif /* LP_BIN_H */ -- cgit v1.2.3 From cdaea049c95031338040b31ff31944c8a001a1dd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 15:31:50 -0700 Subject: llvmpipe: use bin iteration functions when rasterizing bins --- src/gallium/drivers/llvmpipe/lp_rast.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 642f1b90795..a6192e589dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -505,8 +505,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, const struct pipe_framebuffer_state *fb, bool write_depth ) { - unsigned i, j; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); lp_rast_begin( rast, @@ -518,12 +516,28 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->height ); /* loop over tile bins, rasterize each */ - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); +#if 0 + { + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); + } } } +#else + { + struct cmd_bin *bin; + int x, y; + + lp_bin_iter_begin( bins ); + + while ((bin = lp_bin_iter_next(bins, &x, &y))) { + rasterize_bin( rast, bin, x * TILE_SIZE, y * TILE_SIZE); + } + } +#endif lp_rast_end( rast ); -- cgit v1.2.3 From 3a06c113c76355fc9622adfe7565c18d9787e9a8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 17:02:17 -0700 Subject: llvmpipe: repartition lp_rasterizer state for threading Some of the state is per-thread. Put that state in new lp_rasterizer_task struct. --- src/gallium/drivers/llvmpipe/lp_bin.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast.c | 89 +++++++++++++++++++---------- src/gallium/drivers/llvmpipe/lp_rast.h | 7 +++ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 43 +++++++++----- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 62 ++++++++++---------- 5 files changed, 133 insertions(+), 72 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 24e599ea66e..b07ff64e623 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -56,7 +56,9 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); struct cmd_block { lp_rast_cmd cmd[CMD_BLOCK_MAX]; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a6192e589dc..37cc28e938e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -39,14 +39,18 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) { struct lp_rasterizer *rast; + unsigned i; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) return NULL; rast->screen = screen; - rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + + for (i = 0; i < Elements(rast->tasks); i++) { + rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + } return rast; } @@ -153,12 +157,13 @@ lp_rast_end( struct lp_rasterizer *rast ) */ static void lp_rast_start_tile( struct lp_rasterizer *rast, + unsigned thread_index, unsigned x, unsigned y ) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - rast->x = x; - rast->y = y; + rast->tasks[thread_index].x = x; + rast->tasks[thread_index].y = y; } @@ -167,9 +172,11 @@ lp_rast_start_tile( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_clear_color( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const uint8_t *clear_color = arg.clear_color; + uint8_t *color_tile = rast->tasks[thread_index].tile.color; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], @@ -180,14 +187,14 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { - memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } else { unsigned x, y, chan; for (y = 0; y < TILE_SIZE; y++) for (x = 0; x < TILE_SIZE; x++) for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(rast->tile.color, x, y, chan) = clear_color[chan]; + TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan]; } } @@ -197,15 +204,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg) { unsigned i, j; + uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) - rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil; + depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil; } @@ -214,6 +223,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_load_color( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -227,6 +237,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_load_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -236,6 +247,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const struct lp_rast_state *state = arg.set_state; @@ -243,7 +255,7 @@ void lp_rast_set_state( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ - rast->current_state = state; + rast->tasks[thread_index].current_state = state; } @@ -257,9 +269,12 @@ void lp_rast_set_state( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_shade_tile( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const unsigned tile_x = rast->tasks[thread_index].x; + const unsigned tile_y = rast->tasks[thread_index].y; const unsigned mask = ~0; unsigned x, y; @@ -269,7 +284,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 4) for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask); + lp_rast_shade_quads( rast, + thread_index, + inputs, + tile_x + x, + tile_y + y, + mask); } @@ -278,13 +298,14 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, unsigned mask) { #if 1 - const struct lp_rast_state *state = rast->current_state; - struct lp_rast_tile *tile = &rast->tile; + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; void *color; void *depth; uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; @@ -388,10 +409,11 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /** * Write the rasterizer's color tile to the framebuffer. */ -static void lp_rast_store_color( struct lp_rasterizer *rast ) +static void lp_rast_store_color( struct lp_rasterizer *rast, + unsigned thread_index) { - const unsigned x = rast->x; - const unsigned y = rast->y; + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -404,7 +426,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, - rast->tile.color, + rast->tasks[thread_index].tile.color, rast->cbuf_map, rast->cbuf_transfer->stride, x, y, @@ -430,10 +452,11 @@ lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, /** * Write the rasterizer's z/stencil tile to the framebuffer. */ -static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +static void lp_rast_store_zstencil( struct lp_rasterizer *rast, + unsigned thread_index ) { - const unsigned x = rast->x; - const unsigned y = rast->y; + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -446,7 +469,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_write_z32(rast->tile.depth, + lp_tile_write_z32(rast->tasks[thread_index].tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, x, y, w, h); @@ -457,15 +480,16 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) * Write the rasterizer's tiles to the framebuffer. */ static void -lp_rast_end_tile( struct lp_rasterizer *rast ) +lp_rast_end_tile( struct lp_rasterizer *rast, + unsigned thread_index ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); if (rast->state.write_color) - lp_rast_store_color(rast); + lp_rast_store_color(rast, thread_index); if (rast->state.write_zstencil) - lp_rast_store_zstencil(rast); + lp_rast_store_zstencil(rast, thread_index); } @@ -476,6 +500,7 @@ lp_rast_end_tile( struct lp_rasterizer *rast ) */ static void rasterize_bin( struct lp_rasterizer *rast, + unsigned thread_index, const struct cmd_bin *bin, int x, int y) { @@ -483,16 +508,16 @@ rasterize_bin( struct lp_rasterizer *rast, struct cmd_block *block; unsigned k; - lp_rast_start_tile( rast, x, y ); + lp_rast_start_tile( rast, thread_index, x, y ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, block->arg[k] ); + block->cmd[k]( rast, 0, block->arg[k] ); } } - lp_rast_end_tile( rast ); + lp_rast_end_tile( rast, thread_index ); } @@ -522,7 +547,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, for (i = 0; i < bins->tiles_x; i++) { for (j = 0; j < bins->tiles_y; j++) { struct cmd_bin *bin = lp_get_bin(bins, i, j); - rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); + rasterize_bin( rast, 0, bin, i * TILE_SIZE, j * TILE_SIZE ); } } } @@ -534,7 +559,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, lp_bin_iter_begin( bins ); while ((bin = lp_bin_iter_next(bins, &x, &y))) { - rasterize_bin( rast, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin( rast, 0, bin, x * TILE_SIZE, y * TILE_SIZE); } } #endif @@ -550,10 +575,16 @@ lp_rasterize_bins( struct lp_rasterizer *rast, */ void lp_rast_destroy( struct lp_rasterizer *rast ) { + unsigned i; + pipe_surface_reference(&rast->state.cbuf, NULL); pipe_surface_reference(&rast->state.zsbuf, NULL); - align_free(rast->tile.depth); - align_free(rast->tile.color); + + for (i = 0; i < Elements(rast->tasks); i++) { + align_free(rast->tasks[i].tile.depth); + align_free(rast->tasks[i].tile.color); + } + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e77c77b7762..25e7f8e0086 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -193,24 +193,31 @@ lp_rast_arg_null( void ) */ void lp_rast_clear_color( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_clear_zstencil( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_load_color( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_load_zstencil( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_set_state( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_triangle( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 98111edff72..9e7cbd7912e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -30,6 +30,10 @@ #include "lp_rast.h" + +#define MAX_THREADS 4 /* XXX probably temporary here */ + + struct pipe_transfer; struct pipe_screen; @@ -47,14 +51,34 @@ struct lp_rast_tile /** - * This is the state required while rasterizing a tile. - * The tile size is TILE_SIZE x TILE_SIZE pixels. + * Per-thread rasterization state */ -struct lp_rasterizer +struct lp_rasterizer_task { struct lp_rast_tile tile; /** Tile color/z/stencil memory */ unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + + /* Pixel blocks produced during rasterization + */ + unsigned nr_blocks; + struct { + unsigned x; + unsigned y; + unsigned mask; + } blocks[256]; + + const struct lp_rast_state *current_state; +}; + + +/** + * This is the state required while rasterizing tiles. + * Note that this contains per-thread information too. + * The tile size is TILE_SIZE x TILE_SIZE pixels. + */ +struct lp_rasterizer +{ unsigned width, height; /**< Size of framebuffer, in pixels */ boolean clipped_tile; @@ -78,20 +102,13 @@ struct lp_rasterizer char clear_stencil; } state; - /* Pixel blocks produced during rasterization - */ - unsigned nr_blocks; - struct { - unsigned x; - unsigned y; - unsigned mask; - } blocks[256]; - - const struct lp_rast_state *current_state; + /** A task object for each rasterization thread */ + struct lp_rasterizer_task tasks[MAX_THREADS]; }; void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, unsigned masks); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 81a9c1c1422..6c96010c52f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -40,15 +40,15 @@ * All pixels are known to be inside the triangle's bounds. */ static void -block_full_4( struct lp_rasterizer *rast, int x, int y ) +block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) { - const unsigned i = rast->nr_blocks; + const unsigned i = rast_task->nr_blocks; assert(x % 4 == 0); assert(y % 4 == 0); - rast->blocks[i].x = x; - rast->blocks[i].y = y; - rast->blocks[i].mask = ~0; - rast->nr_blocks++; + rast_task->blocks[i].x = x; + rast_task->blocks[i].y = y; + rast_task->blocks[i].mask = ~0; + rast_task->nr_blocks++; } @@ -57,14 +57,14 @@ block_full_4( struct lp_rasterizer *rast, int x, int y ) * All pixels are known to be inside the triangle's bounds. */ static void -block_full_16( struct lp_rasterizer *rast, int x, int y ) +block_full_16( struct lp_rasterizer_task *rast_task, int x, int y ) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast, x + ix, y + iy); + block_full_4(rast_task, x + ix, y + iy); } @@ -74,7 +74,7 @@ block_full_16( struct lp_rasterizer *rast, int x, int y ) * Generate a mask of in/out flags and add the block to the blocks list. */ static void -do_block_4( struct lp_rasterizer *rast, +do_block_4( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y, int c1, @@ -97,11 +97,11 @@ do_block_4( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all zero: */ if (mask) { - const unsigned i = rast->nr_blocks; - rast->blocks[i].x = x; - rast->blocks[i].y = y; - rast->blocks[i].mask = mask; - rast->nr_blocks++; + const unsigned i = rast_task->nr_blocks; + rast_task->blocks[i].x = x; + rast_task->blocks[i].y = y; + rast_task->blocks[i].mask = mask; + rast_task->nr_blocks++; } } @@ -111,7 +111,7 @@ do_block_4( struct lp_rasterizer *rast, * of the triangle's bounds. */ static void -do_block_16( struct lp_rasterizer *rast, +do_block_16( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y, int c1, @@ -146,11 +146,11 @@ do_block_16( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_4(rast, x+ix, y+iy); + block_full_4(rast_task, x+ix, y+iy); } else { /* the block is partially in/out of the triangle */ - do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); } } } @@ -163,12 +163,14 @@ do_block_16( struct lp_rasterizer *rast, */ void lp_rast_triangle( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { + struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index]; const struct lp_rast_triangle *tri = arg.triangle; - int x = rast->x; - int y = rast->y; + int x = rast_task->x; + int y = rast_task->y; int ix, iy; unsigned i = 0; @@ -184,11 +186,11 @@ lp_rast_triangle( struct lp_rasterizer *rast, int eo2 = tri->eo2 * 16; int eo3 = tri->eo3 * 16; - assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); + assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); - rast->nr_blocks = 0; + rast_task->nr_blocks = 0; /* Walk over the tile to build a list of 4x4 pixel blocks which will * be filled/shaded. We do this at two granularities: 16x16 blocks @@ -209,21 +211,23 @@ lp_rast_triangle( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_16(rast, x+ix, y+iy); + block_full_16(rast_task, x+ix, y+iy); } else { /* the block is partially in/out of the triangle */ - do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); } } } - assert(rast->nr_blocks <= Elements(rast->blocks)); + assert(rast_task->nr_blocks <= Elements(rast_task->blocks)); /* Shade the 4x4 pixel blocks */ - for (i = 0; i < rast->nr_blocks; i++) - lp_rast_shade_quads(rast, &tri->inputs, - rast->blocks[i].x, - rast->blocks[i].y, - rast->blocks[i].mask); + for (i = 0; i < rast_task->nr_blocks; i++) + lp_rast_shade_quads(rast, + thread_index, + &tri->inputs, + rast_task->blocks[i].x, + rast_task->blocks[i].y, + rast_task->blocks[i].mask); } -- cgit v1.2.3 From 87c9ceaea2138e051c48cd8c0fbf5f6658100779 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 17:58:46 -0700 Subject: gallium: added pipe_semaphore and related code --- src/gallium/include/pipe/p_thread.h | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'src') diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 25e41482325..45c35a87d0e 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -207,6 +207,56 @@ typedef unsigned pipe_condvar; #endif /* PIPE_OS_? */ +/* + * Semaphores + */ + +typedef struct +{ + pipe_mutex mutex; + pipe_condvar cond; + int counter; +} pipe_semaphore; + + +static INLINE void +pipe_semaphore_init(pipe_semaphore *sema, int init_val) +{ + pipe_mutex_init(sema->mutex); + pipe_condvar_init(sema->cond); + sema->counter = init_val; +} + +static INLINE void +pipe_semaphore_destroy(pipe_semaphore *sema) +{ + pipe_mutex_destroy(sema->mutex); + pipe_condvar_destroy(sema->cond); +} + +/** Signal/increment semaphore counter */ +static INLINE void +pipe_semaphore_signal(pipe_semaphore *sema) +{ + pipe_mutex_lock(sema->mutex); + sema->counter++; + pipe_condvar_signal(sema->cond); + pipe_mutex_unlock(sema->mutex); +} + +/** Wait for semaphore counter to be greater than zero */ +static INLINE void +pipe_semaphore_wait(pipe_semaphore *sema) +{ + pipe_mutex_lock(sema->mutex); + while (sema->counter <= 0) { + pipe_condvar_wait(sema->cond, sema->mutex); + } + sema->counter--; + pipe_mutex_unlock(sema->mutex); +} + + /* * Thread-specific data. -- cgit v1.2.3 From aab1ceceecbd6449eebce7f5f5b356b1a51552e7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 18:01:12 -0700 Subject: llvmpipe: implement threaded rasterization The LP_NUM_THREADS env var controls how many threads are created. The default (and max) is 4, for now. If LP_NUM_THREADS = 0, threading is not used. --- src/gallium/drivers/llvmpipe/lp_rast.c | 195 ++++++++++++++++++++++------ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 18 +++ 2 files changed, 170 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 37cc28e938e..99f7108b423 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "util/u_memory.h" +#include "util/u_math.h" #include "lp_debug.h" #include "lp_state.h" @@ -36,25 +37,6 @@ #include "lp_bin.h" -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) -{ - struct lp_rasterizer *rast; - unsigned i; - - rast = CALLOC_STRUCT(lp_rasterizer); - if(!rast) - return NULL; - - rast->screen = screen; - - for (i = 0; i < Elements(rast->tasks); i++) { - rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - } - - return rast; -} - /** * Begin the rasterization phase. @@ -414,16 +396,25 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, { const unsigned x = rast->tasks[thread_index].x; const unsigned y = rast->tasks[thread_index].y; - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; + int w = TILE_SIZE; + int h = TILE_SIZE; if (x + w > rast->width) w -= x + w - rast->width; - if (y + h > rast->height) - h -= y + h - rast->height; + if (y + h > rast->height) { + int h2; + h2 = h - (y + h - rast->height); + assert(h2 <= TILE_SIZE); + h = h2; + } + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); - LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, + thread_index, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, rast->tasks[thread_index].tile.color, @@ -513,7 +504,7 @@ rasterize_bin( struct lp_rasterizer *rast, /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, 0, block->arg[k] ); + block->cmd[k]( rast, thread_index, block->arg[k] ); } } @@ -524,6 +515,41 @@ rasterize_bin( struct lp_rasterizer *rast, /** * Rasterize/execute all bins. */ +static void +rasterize_bins( struct lp_rasterizer *rast, + unsigned thread_index, + struct lp_bins *bins, + const struct pipe_framebuffer_state *fb, + bool write_depth ) +{ + /* loop over tile bins, rasterize each */ +#if 0 + { + unsigned i, j; + for (i = 0; i < bins->tiles_x; i++) { + for (j = 0; j < bins->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(bins, i, j); + rasterize_bin( rast, thread_index, + bin, i * TILE_SIZE, j * TILE_SIZE ); + } + } + } +#else + { + struct cmd_bin *bin; + int x, y; + + while ((bin = lp_bin_iter_next(bins, &x, &y))) { + rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + } + } +#endif +} + + +/** + * Called by rasterizer when it has something for us to render. + */ void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, @@ -539,30 +565,32 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->zsbuf != NULL && write_depth, fb->width, fb->height ); - - /* loop over tile bins, rasterize each */ -#if 0 - { - unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - rasterize_bin( rast, 0, bin, i * TILE_SIZE, j * TILE_SIZE ); - } - } + + if (rast->num_threads == 0) { + /* no threading */ + lp_bin_iter_begin( bins ); + rasterize_bins( rast, 0, bins, fb, write_depth ); } -#else - { - struct cmd_bin *bin; - int x, y; + else { + /* threaded rendering! */ + unsigned i; + + rast->bins = bins; + rast->fb = fb; + rast->write_depth = write_depth; lp_bin_iter_begin( bins ); - while ((bin = lp_bin_iter_next(bins, &x, &y))) { - rasterize_bin( rast, 0, bin, x * TILE_SIZE, y * TILE_SIZE); + /* signal the threads that there's work to do */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + /* wait for work to complete */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_wait(&rast->tasks[i].work_done); } } -#endif lp_rast_end( rast ); @@ -570,6 +598,87 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } +/** + * This is the thread's main entrypoint. + * It's a simple loop: + * 1. wait for work + * 2. do work + * 3. signal that we're done + */ +static void * +thread_func( void *init_data ) +{ + struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; + struct lp_rasterizer *rast = task->rast; + int debug = 0; + + while (1) { + /* wait for work */ + if (debug) + debug_printf("thread %d waiting for work\n", task->thread_index); + pipe_semaphore_wait(&task->work_ready); + + /* do work */ + if (debug) + debug_printf("thread %d doing work\n", task->thread_index); + rasterize_bins(rast, task->thread_index, + rast->bins, rast->fb, rast->write_depth); + + /* signal done with work */ + if (debug) + debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); + } + + return NULL; +} + + +/** + * Initialize semaphores and spawn the threads. + */ +static void +create_rast_threads(struct lp_rasterizer *rast) +{ + unsigned i; + + rast->num_threads = debug_get_num_option("LP_NUM_THREADS", MAX_THREADS); + rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); + + /* NOTE: if num_threads is zero, we won't use any threads */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_init(&rast->tasks[i].work_ready, 0); + pipe_semaphore_init(&rast->tasks[i].work_done, 0); + rast->threads[i] = pipe_thread_create(thread_func, + (void *) &rast->tasks[i]); + } +} + + + +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) +{ + struct lp_rasterizer *rast; + unsigned i; + + rast = CALLOC_STRUCT(lp_rasterizer); + if(!rast) + return NULL; + + rast->screen = screen; + + for (i = 0; i < Elements(rast->tasks); i++) { + rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].rast = rast; + rast->tasks[i].thread_index = i; + } + + create_rast_threads(rast); + + return rast; +} + /* Shutdown: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 9e7cbd7912e..62f3c877da5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -28,6 +28,7 @@ #ifndef LP_RAST_PRIV_H #define LP_RAST_PRIV_H +#include "pipe/p_thread.h" #include "lp_rast.h" @@ -36,6 +37,7 @@ struct pipe_transfer; struct pipe_screen; +struct lp_rasterizer; /** @@ -69,6 +71,15 @@ struct lp_rasterizer_task } blocks[256]; const struct lp_rast_state *current_state; + + /** "back" pointer */ + struct lp_rasterizer *rast; + + /** "my" index */ + unsigned thread_index; + + pipe_semaphore work_ready; + pipe_semaphore work_done; }; @@ -104,6 +115,13 @@ struct lp_rasterizer /** A task object for each rasterization thread */ struct lp_rasterizer_task tasks[MAX_THREADS]; + + unsigned num_threads; + pipe_thread threads[MAX_THREADS]; + + struct lp_bins *bins; + const struct pipe_framebuffer_state *fb; + boolean write_depth; }; -- cgit v1.2.3 From 73e13c33fd0a9b8574d00d01d301b9d4f88d4051 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 18:18:37 -0700 Subject: llvmpipe: remove some left-over debug code --- src/gallium/drivers/llvmpipe/lp_rast.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 99f7108b423..01685b79d85 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -402,12 +402,9 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, if (x + w > rast->width) w -= x + w - rast->width; - if (y + h > rast->height) { - int h2; - h2 = h - (y + h - rast->height); - assert(h2 <= TILE_SIZE); - h = h2; - } + if (y + h > rast->height) + h -= y + h - rast->height; + assert(w >= 0); assert(h >= 0); assert(w <= TILE_SIZE); -- cgit v1.2.3 From 7f457acabcbeea6a27b4f375f55e318fff52445f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 8 Dec 2009 08:02:49 +0000 Subject: llvmpipe: Use number of CPUs as default number of threads. Also bump MAX_THREADS to 8. --- src/gallium/drivers/llvmpipe/lp_rast.c | 4 +++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 01685b79d85..7cd046cc390 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "lp_debug.h" #include "lp_state.h" @@ -639,7 +640,8 @@ create_rast_threads(struct lp_rasterizer *rast) { unsigned i; - rast->num_threads = debug_get_num_option("LP_NUM_THREADS", MAX_THREADS); + rast->num_threads = util_cpu_caps.nr_cpus; + rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); /* NOTE: if num_threads is zero, we won't use any threads */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 62f3c877da5..5502419a928 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -32,7 +32,7 @@ #include "lp_rast.h" -#define MAX_THREADS 4 /* XXX probably temporary here */ +#define MAX_THREADS 8 /* XXX probably temporary here */ struct pipe_transfer; -- cgit v1.2.3 From 88e62b33dc5ed4a4ab0c668e627c7e85991c74a1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 12:22:12 -0700 Subject: llvmpipe: more bin functions for create/destroy/queries --- src/gallium/drivers/llvmpipe/lp_bin.c | 47 +++++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin.h | 9 +++++++ 2 files changed, 56 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index 3e294e57994..f2d3c2df4d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -29,6 +29,25 @@ #include "lp_bin.h" +struct lp_bins * +lp_bins_create(void) +{ + struct lp_bins *bins = CALLOC_STRUCT(lp_bins); + if (bins) + lp_init_bins(bins); + return bins; +} + + +void +lp_bins_destroy(struct lp_bins *bins) +{ + lp_reset_bins(bins); + lp_free_bin_data(bins); + FREE(bins); +} + + void lp_init_bins(struct lp_bins *bins) { @@ -147,6 +166,34 @@ lp_bin_new_data_block( struct data_block_list *list ) } +/** Return number of bytes used for bin data */ +unsigned +lp_bin_data_size( const struct lp_bins *bins ) +{ + unsigned size = 0; + const struct data_block *block; + for (block = bins->data.head; block; block = block->next) { + size += block->used; + } + return size; +} + + +/** Return number of bytes used for a tile bin */ +unsigned +lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ) +{ + struct cmd_bin *bin = lp_get_bin((struct lp_bins *) bins, x, y); + const struct cmd_block *cmd; + unsigned size = 0; + for (cmd = bin->commands.head; cmd; cmd = cmd->next) { + size += (cmd->count * + (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); + } + return size; +} + + /** * Return last command in the bin */ diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index b07ff64e623..c49b0264d61 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -120,6 +120,11 @@ struct lp_bins { +struct lp_bins *lp_bins_create(void); + +void lp_bins_destroy(struct lp_bins *bins); + + void lp_init_bins(struct lp_bins *bins); void lp_reset_bins(struct lp_bins *bins ); @@ -134,6 +139,10 @@ void lp_bin_new_data_block( struct data_block_list *list ); void lp_bin_new_cmd_block( struct cmd_block_list *list ); +unsigned lp_bin_data_size( const struct lp_bins *bins ); + +unsigned lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ); + /** * Allocate space for a command/data in the bin's data buffer. -- cgit v1.2.3 From 22b07b8be4c2939b00e10f17fa91e68682808594 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 12:28:54 -0700 Subject: llvmpipe: use new lp_setup_get_current_bins() function This stub function will interface to the queue system... --- src/gallium/drivers/llvmpipe/lp_setup.c | 46 +++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 3 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 ++++++----- 3 files changed, 44 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4935d5b5407..484a609e6eb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -47,6 +47,13 @@ static void set_state( struct setup_context *, unsigned ); +struct lp_bins * +lp_setup_get_current_bins(struct setup_context *setup) +{ + /* XXX eventually get bin from queue */ + return setup->bins; +} + static void first_triangle( struct setup_context *setup, @@ -88,7 +95,7 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - lp_reset_bins( &setup->bins ); + lp_reset_bins( setup->bins ); /* Reset some state: */ @@ -108,8 +115,10 @@ static void rasterize_bins( struct setup_context *setup, boolean write_depth ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); + lp_rasterize_bins(setup->rast, - &setup->bins, + bins, setup->fb, write_depth); @@ -123,26 +132,28 @@ rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_color, setup->clear.color ); else - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_load_color, lp_rast_arg_null() ); } if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_zstencil, setup->clear.zstencil ); else - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_load_zstencil, lp_rast_arg_null() ); } @@ -215,6 +226,7 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); unsigned tiles_x, tiles_y; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -226,7 +238,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; - lp_bin_set_num_bins(&setup->bins, tiles_x, tiles_y); + lp_bin_set_num_bins(bins, tiles_x, tiles_y); } @@ -237,6 +249,7 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); unsigned i; LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); @@ -261,12 +274,12 @@ lp_setup_clear( struct setup_context *setup, * don't see that as being a common usage. */ if (flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_color, setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( &setup->bins, + lp_bin_everywhere( bins, lp_rast_clear_zstencil, setup->clear.zstencil ); } @@ -407,6 +420,8 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(setup->fs.current.jit_function); @@ -415,7 +430,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = lp_bin_alloc_aligned(&setup->bins, 4 * 16, 16); + stored = lp_bin_alloc_aligned(bins, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -447,7 +462,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = lp_bin_alloc(&setup->bins, current_size); + stored = lp_bin_alloc(bins, current_size); if(stored) { memcpy(stored, current_data, @@ -477,7 +492,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) lp_bin_alloc(&setup->bins, sizeof *stored); + (struct lp_rast_state *) lp_bin_alloc(bins, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, @@ -485,7 +500,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; /* put the state-set command into all bins */ - lp_bin_state_command( &setup->bins, + lp_bin_state_command( bins, lp_rast_set_state, lp_rast_arg_state(setup->fs.stored) ); } @@ -537,9 +552,10 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); - lp_free_bin_data(&setup->bins); + lp_bins_destroy(setup->bins); lp_rast_destroy( setup->rast ); + FREE( setup ); } @@ -557,7 +573,7 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup->rast) goto fail; - lp_init_bins(&setup->bins); + setup->bins = lp_bins_create(); setup->triangle = first_triangle; setup->line = first_line; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 9b47b595c62..782c05122ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -55,7 +55,7 @@ struct setup_context { struct lp_rasterizer *rast; - struct lp_bins bins; + struct lp_bins *bins; boolean ccw_is_frontface; unsigned cullmode; @@ -113,5 +113,6 @@ void lp_setup_choose_triangle( struct setup_context *setup ); void lp_setup_choose_line( struct setup_context *setup ); void lp_setup_choose_point( struct setup_context *setup ); +struct lp_bins *lp_setup_get_current_bins(struct setup_context *setup); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b8f79849e8b..80617120b1c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -178,6 +178,7 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface) { + struct lp_bins *bins = lp_setup_get_current_bins(setup); unsigned slot; /* Allocate space for the a0, dadx and dady arrays @@ -185,9 +186,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); - tri->inputs.dadx = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); - tri->inputs.dady = lp_bin_alloc_aligned( &setup->bins, bytes, 16 ); + tri->inputs.a0 = lp_bin_alloc_aligned( bins, bytes, 16 ); + tri->inputs.dadx = lp_bin_alloc_aligned( bins, bytes, 16 ); + tri->inputs.dady = lp_bin_alloc_aligned( bins, bytes, 16 ); } /* The internal position input is in slot zero: @@ -263,7 +264,8 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_rast_triangle *tri = lp_bin_alloc( &setup->bins, sizeof *tri ); + struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_rast_triangle *tri = lp_bin_alloc( bins, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -283,7 +285,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_bin_putback_data( &setup->bins, sizeof *tri ); + lp_bin_putback_data( bins, sizeof *tri ); return; } @@ -295,7 +297,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - lp_bin_putback_data( &setup->bins, sizeof *tri ); + lp_bin_putback_data( bins, sizeof *tri ); return; } @@ -405,7 +407,7 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - lp_bin_command( &setup->bins, minx, miny, lp_rast_triangle, + lp_bin_command( bins, minx, miny, lp_rast_triangle, lp_rast_arg_triangle(tri) ); } else @@ -464,7 +466,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - lp_bin_command( &setup->bins, x, y, + lp_bin_command( bins, x, y, lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); } @@ -472,7 +474,7 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* shade partial tile */ - lp_bin_command( &setup->bins, x, y, + lp_bin_command( bins, x, y, lp_rast_triangle, lp_rast_arg_triangle(tri) ); } -- cgit v1.2.3 From ea35993e7479793212529b1db081c84aa71ea4cc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 14:36:52 -0700 Subject: llvmpipe: added new lp_bin_queue.[ch] files The queues will be used for keeping track of full and empty bins so we can overlap setup with the rasterization threads. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bin_queue.c | 156 ++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bin_queue.h | 55 ++++++++++ 4 files changed, 213 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.c create mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6ff45d0f058..4cc4c88ffd2 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -7,6 +7,7 @@ CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ lp_bin.c \ + lp_bin_queue.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 4aef3387353..19ef6861672 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -20,6 +20,7 @@ llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ 'lp_bin.c', + 'lp_bin_queue.c', 'lp_bld_alpha.c', 'lp_bld_arit.c', 'lp_bld_blend_aos.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c new file mode 100644 index 00000000000..19e1a5827b7 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -0,0 +1,156 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Bin queue. We'll use two queues. One contains "full" bins which + * are produced by the "setup" code. The other contains "empty" bins + * which are produced by the "rast" code when it finishes rendering a bin. + */ + + +#include "pipe/p_thread.h" +#include "lp_bin.h" +#include "lp_bin_queue.h" + + + +#define MAX_BINS 4 + + +/** + * A queue of bins + */ +struct lp_bins_queue +{ + /** XXX might use a linked list here somedone, but the list will + * probably always be pretty short. + */ + struct lp_bins *bins[MAX_BINS]; + unsigned size; + + pipe_condvar size_change; + pipe_mutex mutex; +}; + + + +/** Allocate a new bins queue */ +struct lp_bins_queue * +lp_bins_queue_create(void) +{ + struct lp_bins_queue *queue = CALLOC_STRUCT(lp_bins_queue); + if (queue) { + pipe_condvar_init(queue->size_change); + pipe_mutex_init(queue->mutex); + } + return queue; +} + + +/** Delete a new bins queue */ +void +lp_bins_queue_destroy(struct lp_bins_queue *queue) +{ + pipe_condvar_destroy(queue->size_change); + pipe_mutex_destroy(queue->mutex); +} + + +/** Remove first lp_bins from head of queue */ +struct lp_bins * +lp_bins_dequeue(struct lp_bins_queue *queue) +{ + struct lp_bins *bins; + unsigned i; + + pipe_mutex_lock(queue->mutex); + while (queue->size == 0) { + pipe_condvar_wait(queue->size_change, queue->mutex); + } + + assert(queue->size >= 1); + + /* get head */ + bins = queue->bins[0]; + + /* shift entries */ + for (i = 0; i < queue->size - 1; i++) { + queue->bins[i] = queue->bins[i + 1]; + } + + queue->size--; + + /* signal size change */ + pipe_condvar_signal(queue->size_change); + + pipe_mutex_unlock(queue->mutex); + + return bins; +} + + +/** Add an lp_bins to tail of queue */ +void +lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) +{ + pipe_mutex_lock(queue->mutex); + + assert(queue->size < MAX_BINS); + + /* add to end */ + queue->bins[queue->size++] = bins; + + /* signal size change */ + pipe_condvar_signal(queue->size_change); + + pipe_mutex_unlock(queue->mutex); +} + + +/** Return number of entries in the queue */ +unsigned +lp_bins_queue_size(struct lp_bins_queue *queue) +{ + unsigned sz; + pipe_mutex_lock(queue->mutex); + sz = queue->size; + pipe_mutex_unlock(queue->mutex); + return sz; +} + + +/** Wait until the queue as 'size' entries */ +void +lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size) +{ + pipe_mutex_lock(queue->mutex); + while (queue->size != size) { + pipe_condvar_wait(queue->size_change, queue->mutex); + } + pipe_mutex_unlock(queue->mutex); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.h b/src/gallium/drivers/llvmpipe/lp_bin_queue.h new file mode 100644 index 00000000000..8946a541585 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BIN_QUEUE +#define LP_BIN_QUEUE + +struct lp_bin_queue; +struct lp_bins; + + +struct lp_bins_queue * +lp_bins_queue_create(void); + +void +lp_bins_queue_destroy(struct lp_bins_queue *queue); + +struct lp_bins * +lp_bins_dequeue(struct lp_bins_queue *queue); + +void +lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins); + +unsigned +lp_bins_queue_size(struct lp_bins_queue *queue); + +void +lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size); + + +#endif /* LP_BIN_QUEUE */ -- cgit v1.2.3 From d7dbc666367438ee9efe748505907b36bba6b66a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 14:53:33 -0700 Subject: llvmpipe: checkpoint: begin plugging in bin queue code --- src/gallium/drivers/llvmpipe/lp_rast.c | 12 ++++++++- src/gallium/drivers/llvmpipe/lp_rast.h | 4 ++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 4 +++ src/gallium/drivers/llvmpipe/lp_setup.c | 33 ++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +++++- 5 files changed, 56 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 7cd046cc390..0471ad7e2f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -29,6 +29,7 @@ #include "util/u_math.h" #include "util/u_cpu_detect.h" +#include "lp_bin_queue.h" #include "lp_debug.h" #include "lp_state.h" #include "lp_rast.h" @@ -655,7 +656,13 @@ create_rast_threads(struct lp_rasterizer *rast) -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) +/** + * Create new lp_rasterizer. + * \param empty the queue to put empty bins on after we've finished + * processing them. + */ +struct lp_rasterizer * +lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) { struct lp_rasterizer *rast; unsigned i; @@ -666,6 +673,9 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) rast->screen = screen; + rast->empty_bins = empty; + rast->full_bins = lp_bins_queue_create(); + for (i = 0; i < Elements(rast->tasks); i++) { rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 25e7f8e0086..0000fbc5c71 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -48,6 +48,7 @@ */ struct lp_rasterizer; struct lp_bins; +struct lp_bins_queue; struct cmd_bin; struct pipe_screen; @@ -130,7 +131,8 @@ struct lp_rast_triangle { -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ); +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, + struct lp_bins_queue *empty ); void lp_rast_destroy( struct lp_rasterizer * ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5502419a928..4e4f8b36a7a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -95,6 +95,10 @@ struct lp_rasterizer boolean clipped_tile; boolean check_for_clipped_tiles; + struct lp_bins_queue *full_bins; + struct lp_bins_queue *empty_bins; + pipe_mutex get_bin_mutex; + /* Framebuffer stuff */ struct pipe_screen *screen; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 484a609e6eb..c8cdc328533 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -37,6 +37,8 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "lp_bin.h" +#include "lp_bin_queue.h" #include "lp_debug.h" #include "lp_state.h" #include "lp_buffer.h" @@ -44,6 +46,10 @@ #include "lp_setup_context.h" +/** XXX temporary value, temporary here */ +#define MAX_BINS 2 + + static void set_state( struct setup_context *, unsigned ); @@ -554,6 +560,14 @@ lp_setup_destroy( struct setup_context *setup ) lp_bins_destroy(setup->bins); + /* free the bins in the 'empty' queue */ + while (lp_bins_queue_size(setup->empty_bins) > 0) { + struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); + if (!bins) + break; + lp_bins_destroy(bins); + } + lp_rast_destroy( setup->rast ); FREE( setup ); @@ -567,14 +581,28 @@ lp_setup_destroy( struct setup_context *setup ) struct setup_context * lp_setup_create( struct pipe_screen *screen ) { + unsigned i; struct setup_context *setup = CALLOC_STRUCT(setup_context); - setup->rast = lp_rast_create( screen ); + if (!setup) + return NULL; + + setup->empty_bins = lp_bins_queue_create(); + if (!setup->empty_bins) + goto fail; + + setup->rast = lp_rast_create( screen, setup->empty_bins ); if (!setup->rast) goto fail; setup->bins = lp_bins_create(); + /* create some empty bins */ + for (i = 0; i < MAX_BINS; i++) { + struct lp_bins *bins = lp_bins_create(); + lp_bins_enqueue(setup->empty_bins, bins); + } + setup->triangle = first_triangle; setup->line = first_line; setup->point = first_point; @@ -584,6 +612,9 @@ lp_setup_create( struct pipe_screen *screen ) return setup; fail: + if (setup->empty_bins) + lp_bins_queue_destroy(setup->empty_bins); + FREE(setup); return NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 782c05122ca..584e37665bc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -46,6 +46,9 @@ #define LP_SETUP_NEW_BLEND_COLOR 0x04 +struct lp_bins_queue; + + /** * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, @@ -55,7 +58,9 @@ struct setup_context { struct lp_rasterizer *rast; - struct lp_bins *bins; + + struct lp_bins *bins; /**< current bins */ + struct lp_bins_queue *empty_bins; /**< queue of empty bins */ boolean ccw_is_frontface; unsigned cullmode; -- cgit v1.2.3 From 3bee8c2e7c17893f91f6b62e2db090ef495dca9d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 16:02:30 -0700 Subject: llvmpipe: use the empty_bins queue now --- src/gallium/drivers/llvmpipe/lp_rast.c | 8 ++++++++ src/gallium/drivers/llvmpipe/lp_setup.c | 19 +++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 0471ad7e2f0..3165128f8f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -569,6 +569,10 @@ lp_rasterize_bins( struct lp_rasterizer *rast, /* no threading */ lp_bin_iter_begin( bins ); rasterize_bins( rast, 0, bins, fb, write_depth ); + + /* reset bins and put into the empty queue */ + lp_reset_bins( bins ); + lp_bins_enqueue( rast->empty_bins, bins); } else { /* threaded rendering! */ @@ -589,6 +593,10 @@ lp_rasterize_bins( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } + + /* reset bins and put into the empty queue */ + lp_reset_bins( bins ); + lp_bins_enqueue( rast->empty_bins, bins); } lp_rast_end( rast ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c8cdc328533..889f92a0d50 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -56,7 +56,17 @@ static void set_state( struct setup_context *, unsigned ); struct lp_bins * lp_setup_get_current_bins(struct setup_context *setup) { - /* XXX eventually get bin from queue */ + if (!setup->bins) { + /* wait for a free/empty bin */ + setup->bins = lp_bins_dequeue(setup->empty_bins); + if(0)lp_reset_bins( setup->bins ); /* XXX temporary? */ + + if (setup->fb) { + unsigned tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; + unsigned tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; + lp_bin_set_num_bins(setup->bins, tiles_x, tiles_y); + } + } return setup->bins; } @@ -101,7 +111,8 @@ static void reset_context( struct setup_context *setup ) setup->fs.stored = NULL; setup->dirty = ~0; - lp_reset_bins( setup->bins ); + /* no current bin */ + setup->bins = NULL; /* Reset some state: */ @@ -558,8 +569,6 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); - lp_bins_destroy(setup->bins); - /* free the bins in the 'empty' queue */ while (lp_bins_queue_size(setup->empty_bins) > 0) { struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); @@ -595,8 +604,6 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup->rast) goto fail; - setup->bins = lp_bins_create(); - /* create some empty bins */ for (i = 0; i < MAX_BINS; i++) { struct lp_bins *bins = lp_bins_create(); -- cgit v1.2.3 From ad3c16c127f167513a136759a1700e111a0ef7b8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 16:30:05 -0700 Subject: llvmpipe: simplify the tiles_x, tiles_y code a bit --- src/gallium/drivers/llvmpipe/lp_bin.c | 10 ++++++---- src/gallium/drivers/llvmpipe/lp_bin.h | 5 ++--- src/gallium/drivers/llvmpipe/lp_setup.c | 12 +++--------- 3 files changed, 11 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c index f2d3c2df4d2..703cdd2de59 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ b/src/gallium/drivers/llvmpipe/lp_bin.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "util/u_math.h" #include "util/u_memory.h" #include "lp_bin.h" @@ -137,13 +138,14 @@ lp_free_bin_data(struct lp_bins *bins) void -lp_bin_set_num_bins( struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y ) +lp_bin_set_framebuffer_size( struct lp_bins *bins, + unsigned width, unsigned height ) { - bins->tiles_x = tiles_x; - bins->tiles_y = tiles_y; + bins->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; + bins->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; } + void lp_bin_new_cmd_block( struct cmd_block_list *list ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index c49b0264d61..4394e7bda05 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -131,9 +131,8 @@ void lp_reset_bins(struct lp_bins *bins ); void lp_free_bin_data(struct lp_bins *bins); -void -lp_bin_set_num_bins( struct lp_bins *bins, - unsigned tiles_x, unsigned tiles_y ); +void lp_bin_set_framebuffer_size( struct lp_bins *bins, + unsigned width, unsigned height ); void lp_bin_new_data_block( struct data_block_list *list ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 889f92a0d50..3ef9cdaa0ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -34,7 +34,6 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" #include "lp_bin.h" @@ -62,9 +61,8 @@ lp_setup_get_current_bins(struct setup_context *setup) if(0)lp_reset_bins( setup->bins ); /* XXX temporary? */ if (setup->fb) { - unsigned tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; - unsigned tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; - lp_bin_set_num_bins(setup->bins, tiles_x, tiles_y); + lp_bin_set_framebuffer_size(setup->bins, + setup->fb->width, setup->fb->height); } } return setup->bins; @@ -244,7 +242,6 @@ lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { struct lp_bins *bins = lp_setup_get_current_bins(setup); - unsigned tiles_x, tiles_y; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -252,10 +249,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, setup->fb = fb; - tiles_x = align(setup->fb->width, TILE_SIZE) / TILE_SIZE; - tiles_y = align(setup->fb->height, TILE_SIZE) / TILE_SIZE; - - lp_bin_set_num_bins(bins, tiles_x, tiles_y); + lp_bin_set_framebuffer_size(bins, setup->fb->width, setup->fb->height); } -- cgit v1.2.3 From 96689d451a24753e088f40fb167c3cb26d8045ac Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Dec 2009 16:32:32 -0700 Subject: llvmpipe: added some debug/info code --- src/gallium/drivers/llvmpipe/lp_rast.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3165128f8f6..5659ae2ca5a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,6 +40,7 @@ + /** * Begin the rasterization phase. * Map the framebuffer surfaces. Initialize the 'rast' state. @@ -555,8 +556,22 @@ lp_rasterize_bins( struct lp_rasterizer *rast, const struct pipe_framebuffer_state *fb, bool write_depth ) { + boolean debug = false; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + if (debug) { + unsigned x, y; + printf("rasterize bins:\n"); + printf(" data size: %u\n", lp_bin_data_size(bins)); + for (y = 0; y < bins->tiles_y; y++) { + for (x = 0; x < bins->tiles_x; x++) { + printf(" bin %u, %u size: %u\n", x, y, + lp_bin_cmd_size(bins, x, y)); + } + } + } + lp_rast_begin( rast, fb->cbufs[0], fb->zsbuf, -- cgit v1.2.3 From 21008441e4609c8590ede093a549ef689516ddd4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 10:59:46 -0700 Subject: llvmpipe: updated comments --- src/gallium/drivers/llvmpipe/lp_rast.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 0000fbc5c71..75f7b744045 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -41,17 +41,13 @@ #include "lp_jit.h" -/* Initially create and program a single rasterizer directly. Later - * will want multiple of these, one or two per core. At that stage - * will probably pass command buffers into the rasterizers rather than - * individual function calls like this. - */ struct lp_rasterizer; struct lp_bins; struct lp_bins_queue; struct cmd_bin; struct pipe_screen; +/** For sub-pixel positioning */ #define FIXED_ORDER 4 #define FIXED_ONE (1< Date: Thu, 10 Dec 2009 14:54:32 -0700 Subject: llvmpipe: added some bin queue debug code --- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c index 19e1a5827b7..0fda0b5baeb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -122,6 +122,14 @@ lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) assert(queue->size < MAX_BINS); + /* debug: check that bins is not already in the queue */ + if (0) { + unsigned i; + for (i = 0; i < queue->size; i++) { + assert(queue->bins[i] != bins); + } + } + /* add to end */ queue->bins[queue->size++] = bins; -- cgit v1.2.3 From a67f39810b5c88367ae2a9ee564b1a740b27601b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:54:57 -0700 Subject: gallium/util: added framebuffer compare, copy util funcs --- src/gallium/auxiliary/util/u_surface.c | 49 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_surface.h | 9 +++++++ 2 files changed, 58 insertions(+) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 85e443204e3..a95b887e847 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" +#include "util/u_memory.h" #include "util/u_surface.h" @@ -111,3 +112,51 @@ util_destroy_rgba_surface(struct pipe_texture *texture, pipe_texture_reference(&texture, NULL); } + + +/** + * Compare pipe_framebuffer_state objects. + * \return TRUE if same, FALSE if different + */ +boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + boolean changed = FALSE; + unsigned i; + + for (i = 0; i < Elements(src->cbufs); i++) { + if (dst->cbufs[i] != src->cbufs[i]) { + changed = TRUE; + } + } + + if (dst->nr_cbufs != src->nr_cbufs) { + changed = TRUE; + } + + if (dst->zsbuf != src->zsbuf) { + changed = TRUE; + } + + return changed; +} + + +/** + * Copy framebuffer state from src to dst, updating refcounts. + */ +void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + for (i = 0; i < Elements(src->cbufs); i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + + dst->nr_cbufs = src->nr_cbufs; + + pipe_surface_reference(&dst->zsbuf, src->zsbuf); +} diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index ce84ed7ad06..a9da9aadcbb 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -66,4 +66,13 @@ util_destroy_rgba_surface(struct pipe_texture *texture, struct pipe_surface *surface); +extern boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + +extern void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + + #endif /* U_SURFACE_H */ -- cgit v1.2.3 From 6d810e5a7b082b9769a4ede4661536ae0e070dd2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:55:28 -0700 Subject: llvmpipe: simplify llvmpipe_set_framebuffer_state() --- src/gallium/drivers/llvmpipe/lp_state_surface.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 3eff40e3f19..0263f2a624a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -28,6 +28,8 @@ /* Authors: Keith Whitwell */ +#include "pipe/p_state.h" +#include "util/u_surface.h" #include "lp_context.h" #include "lp_state.h" #include "lp_surface.h" @@ -44,27 +46,12 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct llvmpipe_context *lp = llvmpipe_context(pipe); - uint i; - boolean dirty = FALSE; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { - pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); - dirty = TRUE; - } - } + boolean changed = util_framebuffer_state_equal(&lp->framebuffer, fb); - if (lp->framebuffer.nr_cbufs != fb->nr_cbufs) { - dirty = TRUE; - lp->framebuffer.nr_cbufs = fb->nr_cbufs; - } + if (changed) { - /* zbuf changing? */ - if (lp->framebuffer.zsbuf != fb->zsbuf) { - dirty = TRUE; - - /* assign new */ - pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); + util_copy_framebuffer_state(&lp->framebuffer, fb); /* Tell draw module how deep the Z/depth buffer is */ if (lp->framebuffer.zsbuf) { @@ -80,9 +67,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } draw_set_mrd(lp->draw, mrd); } - } - if (dirty) { lp_setup_bind_framebuffer( lp->setup, fb ); lp->dirty |= LP_NEW_FRAMEBUFFER; -- cgit v1.2.3 From 9a6567f1ed88727545f747e8670b713f17627c94 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:56:11 -0700 Subject: llvmpipe: updated comment --- src/gallium/drivers/llvmpipe/lp_setup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3ef9cdaa0ca..d976934a5dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -280,9 +280,10 @@ lp_setup_clear( struct setup_context *setup, if (setup->state == SETUP_ACTIVE) { /* Add the clear to existing bins. In the unusual case where - * both color and depth-stencilare being cleared, we could - * discard the currently binned scene and start again, but I - * don't see that as being a common usage. + * both color and depth-stencil are being cleared when there's + * already been some rendering, we could discard the currently + * binned scene and start again, but I don't see that as being + * a common usage. */ if (flags & PIPE_CLEAR_COLOR) lp_bin_everywhere( bins, -- cgit v1.2.3 From 9509f73c2147a9e225b5ef69a646e5dd711573f5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Dec 2009 14:56:30 -0700 Subject: llvmpipe: checkpoint: use empty/full bin queues --- src/gallium/drivers/llvmpipe/lp_rast.c | 64 +++++++++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 8 ++++ 2 files changed, 64 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5659ae2ca5a..0cd95e0ca72 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,6 +40,46 @@ +/** + * Called by rasterization threads to get the next chunk of work. + * We use a lock to make sure that all the threads get the same bins. + */ +static struct lp_bins * +get_next_full_bin( struct lp_rasterizer *rast ) +{ + pipe_mutex_lock( rast->get_bin_mutex ); + if (!rast->curr_bins) { + /* this will wait until there's something in the queue */ + rast->curr_bins = lp_bins_dequeue( rast->full_bins ); + rast->release_count = 0; + + lp_bin_iter_begin( rast->curr_bins ); + } + pipe_mutex_unlock( rast->get_bin_mutex ); + return rast->curr_bins; +} + + +/** + * Called by rasterization threads after they've finished with + * the current bin. When all threads have called this, we reset + * the bin and put it into the 'empty bins' queue. + */ +static void +release_current_bin( struct lp_rasterizer *rast ) +{ + pipe_mutex_lock( rast->get_bin_mutex ); + rast->release_count++; + if (rast->release_count == rast->num_threads) { + assert(rast->curr_bins); + lp_reset_bins( rast->curr_bins ); + lp_bins_enqueue( rast->empty_bins, rast->curr_bins ); + rast->curr_bins = NULL; + } + pipe_mutex_unlock( rast->get_bin_mutex ); +} + + /** * Begin the rasterization phase. @@ -488,6 +528,7 @@ lp_rast_end_tile( struct lp_rasterizer *rast, * Rasterize commands for a single bin. * \param x, y position of the bin's tile in the framebuffer * Must be called between lp_rast_begin() and lp_rast_end(). + * Called per thread. */ static void rasterize_bin( struct lp_rasterizer *rast, @@ -514,6 +555,7 @@ rasterize_bin( struct lp_rasterizer *rast, /** * Rasterize/execute all bins. + * Called per thread. */ static void rasterize_bins( struct lp_rasterizer *rast, @@ -539,6 +581,7 @@ rasterize_bins( struct lp_rasterizer *rast, struct cmd_bin *bin; int x, y; + assert(bins); while ((bin = lp_bin_iter_next(bins, &x, &y))) { rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } @@ -593,11 +636,13 @@ lp_rasterize_bins( struct lp_rasterizer *rast, /* threaded rendering! */ unsigned i; - rast->bins = bins; + lp_bins_enqueue( rast->full_bins, bins ); + + /* XXX need to move/fix these */ rast->fb = fb; rast->write_depth = write_depth; - lp_bin_iter_begin( bins ); + /*lp_bin_iter_begin( bins );*/ /* signal the threads that there's work to do */ for (i = 0; i < rast->num_threads; i++) { @@ -608,10 +653,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } - - /* reset bins and put into the empty queue */ - lp_reset_bins( bins ); - lp_bins_enqueue( rast->empty_bins, bins); } lp_rast_end( rast ); @@ -632,19 +673,26 @@ thread_func( void *init_data ) { struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; struct lp_rasterizer *rast = task->rast; - int debug = 0; + boolean debug = false; while (1) { + struct lp_bins *bins; + /* wait for work */ if (debug) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); + bins = get_next_full_bin( rast ); + assert(bins); + /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_bins(rast, task->thread_index, - rast->bins, rast->fb, rast->write_depth); + bins, rast->fb, rast->write_depth); + + release_current_bin( rast ); /* signal done with work */ if (debug) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 4e4f8b36a7a..f174aa15052 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -95,10 +95,18 @@ struct lp_rasterizer boolean clipped_tile; boolean check_for_clipped_tiles; + /** The incoming queue of filled bins to rasterize */ struct lp_bins_queue *full_bins; + /** The outgoing queue of emptied bins to return to setup modulee */ struct lp_bins_queue *empty_bins; + pipe_mutex get_bin_mutex; + /** The bins currently being rasterized by the threads */ + struct lp_bins *curr_bins; + /** Counter to determine when all threads are done with current bin */ + unsigned release_count; + /* Framebuffer stuff */ struct pipe_screen *screen; -- cgit v1.2.3 From 4e67f10331bfd87560e2900e66f3b942902bc65c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:16:48 -0700 Subject: llvmpipe: minor comment fix --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 0cd95e0ca72..2c9c13ab22f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -591,7 +591,7 @@ rasterize_bins( struct lp_rasterizer *rast, /** - * Called by rasterizer when it has something for us to render. + * Called by setup module when it has something for us to render. */ void lp_rasterize_bins( struct lp_rasterizer *rast, -- cgit v1.2.3 From 205da96fc64a197b7d1a15010456402030d8893b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:25:22 -0700 Subject: llvmpipe: remove unused fb parameter --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2c9c13ab22f..7083029e458 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -561,7 +561,6 @@ static void rasterize_bins( struct lp_rasterizer *rast, unsigned thread_index, struct lp_bins *bins, - const struct pipe_framebuffer_state *fb, bool write_depth ) { /* loop over tile bins, rasterize each */ @@ -626,7 +625,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, if (rast->num_threads == 0) { /* no threading */ lp_bin_iter_begin( bins ); - rasterize_bins( rast, 0, bins, fb, write_depth ); + rasterize_bins( rast, 0, bins, write_depth ); /* reset bins and put into the empty queue */ lp_reset_bins( bins ); @@ -690,7 +689,7 @@ thread_func( void *init_data ) if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_bins(rast, task->thread_index, - bins, rast->fb, rast->write_depth); + bins, rast->write_depth); release_current_bin( rast ); -- cgit v1.2.3 From 8f2a1736635368951c3f30e484ee6137066964d6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:39:53 -0700 Subject: gallium/util: simplify util_framebuffer_state_equal() And copy width, height in util_copy_framebuffer_state(). --- src/gallium/auxiliary/util/u_surface.c | 16 +++++++++++----- src/gallium/auxiliary/util/u_surface.h | 10 +++++----- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index a95b887e847..daaa275ef25 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -122,24 +122,27 @@ boolean util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, const struct pipe_framebuffer_state *src) { - boolean changed = FALSE; unsigned i; + if (dst->width != src->width || + dst->height != src->height) + return FALSE; + for (i = 0; i < Elements(src->cbufs); i++) { if (dst->cbufs[i] != src->cbufs[i]) { - changed = TRUE; + return FALSE; } } if (dst->nr_cbufs != src->nr_cbufs) { - changed = TRUE; + return FALSE; } if (dst->zsbuf != src->zsbuf) { - changed = TRUE; + return FALSE; } - return changed; + return TRUE; } @@ -152,6 +155,9 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, { unsigned i; + dst->width = src->width; + dst->height = src->height; + for (i = 0; i < Elements(src->cbufs); i++) { pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); } diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index a9da9aadcbb..3c60df2c3e5 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -30,11 +30,7 @@ #include "pipe/p_compiler.h" - - -struct pipe_screen; -struct pipe_texture; -struct pipe_surface; +#include "pipe/p_state.h" /** @@ -75,4 +71,8 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, const struct pipe_framebuffer_state *src); +extern void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb); + + #endif /* U_SURFACE_H */ -- cgit v1.2.3 From d1fa748cdba0b1145066186b3d634b79b5d69473 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:40:52 -0700 Subject: gallium/util: added util_unreference_framebuffer_state() --- src/gallium/auxiliary/util/u_surface.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index daaa275ef25..cfdf7ab8f8a 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -166,3 +166,19 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, pipe_surface_reference(&dst->zsbuf, src->zsbuf); } + + +void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb) +{ + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + + pipe_surface_reference(&fb->zsbuf, NULL); + + fb->width = fb->height = 0; + fb->nr_cbufs = 0; +} -- cgit v1.2.3 From 544882eb58253a4538ccc90ae091abed353416b4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:41:11 -0700 Subject: llvmpipe: fix inverted util_framebuffer_state_equal() result --- src/gallium/drivers/llvmpipe/lp_state_surface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 0263f2a624a..21565436eb6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -47,7 +47,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, { struct llvmpipe_context *lp = llvmpipe_context(pipe); - boolean changed = util_framebuffer_state_equal(&lp->framebuffer, fb); + boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb); if (changed) { -- cgit v1.2.3 From 9d0faea58cee28cf16bd31e6adbb2d93c391c556 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:42:18 -0700 Subject: llvmpipe: fix-up #includes --- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c index 0fda0b5baeb..b4bc439089e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -34,7 +34,7 @@ #include "pipe/p_thread.h" -#include "lp_bin.h" +#include "util/u_memory.h" #include "lp_bin_queue.h" -- cgit v1.2.3 From 156eabbaf996f471458ee2a69078674277b89067 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:46:23 -0700 Subject: llvmpipe: improve framebuffer/surface code --- src/gallium/drivers/llvmpipe/lp_rast.c | 70 ++++++++++++++--------------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 5 +-- 2 files changed, 34 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 7083029e458..97233e17008 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,6 +28,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" +#include "util/u_surface.h" #include "lp_bin_queue.h" #include "lp_debug.h" @@ -87,28 +88,25 @@ release_current_bin( struct lp_rasterizer *rast ) */ static boolean lp_rast_begin( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - boolean write_color, - boolean write_zstencil, - unsigned width, - unsigned height ) + const struct pipe_framebuffer_state *fb, + boolean write_color, + boolean write_zstencil ) { struct pipe_screen *screen = rast->screen; + struct pipe_surface *cbuf, *zsbuf; - LP_DBG(DEBUG_RAST, "%s %dx%d\n", __FUNCTION__, width, height); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - pipe_surface_reference(&rast->state.cbuf, cbuf); - pipe_surface_reference(&rast->state.zsbuf, zsbuf); + util_copy_framebuffer_state(&rast->state.fb, fb); - rast->width = width; - rast->height = height; rast->state.write_zstencil = write_zstencil; rast->state.write_color = write_color; - rast->check_for_clipped_tiles = (width % TILE_SIZE != 0 || - height % TILE_SIZE != 0); + rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || + fb->height % TILE_SIZE != 0); + /* XXX support multiple color buffers here */ + cbuf = rast->state.fb.cbufs[0]; if (cbuf) { rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, cbuf->texture, @@ -116,7 +114,8 @@ lp_rast_begin( struct lp_rasterizer *rast, cbuf->level, cbuf->zslice, PIPE_TRANSFER_READ_WRITE, - 0, 0, width, height); + 0, 0, + fb->width, fb->height); if (!rast->cbuf_transfer) return FALSE; @@ -126,14 +125,16 @@ lp_rast_begin( struct lp_rasterizer *rast, return FALSE; } + zsbuf = rast->state.fb.zsbuf; if (zsbuf) { rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, width, height); + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + fb->width, fb->height); if (!rast->zsbuf_transfer) return FALSE; @@ -442,11 +443,11 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, int w = TILE_SIZE; int h = TILE_SIZE; - if (x + w > rast->width) - w -= x + w - rast->width; + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; - if (y + h > rast->height) - h -= y + h - rast->height; + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; assert(w >= 0); assert(h >= 0); @@ -491,11 +492,11 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast, unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; - if (x + w > rast->width) - w -= x + w - rast->width; + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; - if (y + h > rast->height) - h -= y + h - rast->height; + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); @@ -614,13 +615,9 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } } - lp_rast_begin( rast, - fb->cbufs[0], - fb->zsbuf, - fb->cbufs[0] != NULL, - fb->zsbuf != NULL && write_depth, - fb->width, - fb->height ); + lp_rast_begin( rast, fb, + fb->cbufs[0]!= NULL, + fb->zsbuf != NULL && write_depth ); if (rast->num_threads == 0) { /* no threading */ @@ -765,8 +762,7 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) { unsigned i; - pipe_surface_reference(&rast->state.cbuf, NULL); - pipe_surface_reference(&rast->state.zsbuf, NULL); + util_unreference_framebuffer_state(&rast->state.fb); for (i = 0; i < Elements(rast->tasks); i++) { align_free(rast->tasks[i].tile.depth); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f174aa15052..abe791fd006 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -90,8 +90,6 @@ struct lp_rasterizer_task */ struct lp_rasterizer { - unsigned width, height; /**< Size of framebuffer, in pixels */ - boolean clipped_tile; boolean check_for_clipped_tiles; @@ -116,8 +114,7 @@ struct lp_rasterizer void *zsbuf_map; struct { - struct pipe_surface *cbuf; - struct pipe_surface *zsbuf; + struct pipe_framebuffer_state fb; boolean write_color; boolean write_zstencil; unsigned clear_color; -- cgit v1.2.3 From de31b0e60c4b68b73b8983a0ae3e8f3f61d9d583 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 11:56:22 -0700 Subject: llvmpipe: remove unused lp_rasterizer::fb field --- src/gallium/drivers/llvmpipe/lp_rast.c | 1 - src/gallium/drivers/llvmpipe/lp_rast_priv.h | 1 - 2 files changed, 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 97233e17008..a8212d74e36 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -635,7 +635,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, lp_bins_enqueue( rast->full_bins, bins ); /* XXX need to move/fix these */ - rast->fb = fb; rast->write_depth = write_depth; /*lp_bin_iter_begin( bins );*/ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index abe791fd006..4ae54ac8c1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -129,7 +129,6 @@ struct lp_rasterizer pipe_thread threads[MAX_THREADS]; struct lp_bins *bins; - const struct pipe_framebuffer_state *fb; boolean write_depth; }; -- cgit v1.2.3 From 0fc90dfa280e12a100c6c7c632d5d29c16118c9a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 14:00:25 -0700 Subject: gallium: added pipe_barrier type and functions --- src/gallium/include/pipe/p_thread.h | 61 ++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 45c35a87d0e..ba5cd589f84 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -27,7 +27,8 @@ /** * @file * - * Thread, mutex, condition var and thread-specific data functions. + * Thread, mutex, condition variable, barrier, semaphore and + * thread-specific data functions. */ @@ -106,6 +107,24 @@ typedef pthread_cond_t pipe_condvar; pthread_cond_broadcast(&(cond)) +typedef pthread_barrier_t pipe_barrier; + +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + pthread_barrier_init(barrier, NULL, count); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + pthread_barrier_destroy(barrier); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + pthread_barrier_wait(barrier); +} + + #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) #include @@ -162,6 +181,27 @@ typedef unsigned pipe_condvar; #define pipe_condvar_broadcast(condvar) \ (void) condvar + +typedef unsigned pipe_barrier; + +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + /* XXX we could implement barriers with a mutex and condition var */ + assert(0); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + assert(0); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + assert(0); +} + + + #else /** Dummy definitions */ @@ -169,6 +209,7 @@ typedef unsigned pipe_condvar; typedef unsigned pipe_thread; typedef unsigned pipe_mutex; typedef unsigned pipe_condvar; +typedef unsigned pipe_barrier; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = 0 @@ -204,6 +245,24 @@ typedef unsigned pipe_condvar; (void) condvar +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + /* XXX we could implement barriers with a mutex and condition var */ + assert(0); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + assert(0); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + assert(0); +} + + + #endif /* PIPE_OS_? */ -- cgit v1.2.3 From 24d894e5579bd11fdf294d86834093e353abf4db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 14:07:01 -0700 Subject: gallium: comments and minor re-org in p_thread.h There's more work to do in this file: 1. Implement condvars for Windows via Win32 CONDITION_VARIABLE type. 2. Implement barriers for Windows 3. Try to get rid of PIPE_THREAD_HAVE_CONDVAR (only used in trace driver) 4. Why the 2 in _P_THREAD2_H_? --- src/gallium/include/pipe/p_thread.h | 40 +++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index ba5cd589f84..8119c1f571c 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -47,6 +47,8 @@ #define PIPE_THREAD_HAVE_CONDVAR +/* pipe_thread + */ typedef pthread_t pipe_thread; #define PIPE_THREAD_ROUTINE( name, param ) \ @@ -70,8 +72,10 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) return pthread_detach( thread ); } + +/* pipe_mutex + */ typedef pthread_mutex_t pipe_mutex; -typedef pthread_cond_t pipe_condvar; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = PTHREAD_MUTEX_INITIALIZER @@ -88,6 +92,11 @@ typedef pthread_cond_t pipe_condvar; #define pipe_mutex_unlock(mutex) \ (void) pthread_mutex_unlock(&(mutex)) + +/* pipe_condvar + */ +typedef pthread_cond_t pipe_condvar; + #define pipe_static_condvar(mutex) \ static pipe_condvar mutex = PTHREAD_COND_INITIALIZER @@ -107,6 +116,8 @@ typedef pthread_cond_t pipe_condvar; pthread_cond_broadcast(&(cond)) +/* pipe_barrier + */ typedef pthread_barrier_t pipe_barrier; static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) @@ -129,6 +140,8 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) #include +/* pipe_thread + */ typedef HANDLE pipe_thread; #define PIPE_THREAD_ROUTINE( name, param ) \ @@ -154,6 +167,9 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) return -1; } + +/* pipe_mutex + */ typedef CRITICAL_SECTION pipe_mutex; #define pipe_static_mutex(mutex) \ @@ -171,17 +187,29 @@ typedef CRITICAL_SECTION pipe_mutex; #define pipe_mutex_unlock(mutex) \ LeaveCriticalSection(&mutex) -/* XXX: dummy definitions, make it compile */ +/* pipe_condvar (XXX FIX THIS) + */ typedef unsigned pipe_condvar; -#define pipe_condvar_init(condvar) \ - (void) condvar +#define pipe_condvar_init(cond) \ + (void) cond -#define pipe_condvar_broadcast(condvar) \ - (void) condvar +#define pipe_condvar_destroy(cond) \ + (void) cond + +#define pipe_condvar_wait(cond, mutex) \ + (void) cond; (void) mutex + +#define pipe_condvar_signal(cond) \ + (void) cond + +#define pipe_condvar_broadcast(cond) \ + (void) cond +/* pipe_barrier (XXX FIX THIS) + */ typedef unsigned pipe_barrier; static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) -- cgit v1.2.3 From 2bce5c195f94e2cce8f67c6a8066b0ae408487ce Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 14:52:42 -0700 Subject: llvmpipe: checkpoint: more thread/queuing changes Now mapping/unmapping the framebuffer is done by a rasteizer thread rather than the main calling thread. --- src/gallium/drivers/llvmpipe/lp_bin.h | 5 ++ src/gallium/drivers/llvmpipe/lp_rast.c | 130 +++++++++++++++------------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 24 +++-- 3 files changed, 84 insertions(+), 75 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 4394e7bda05..565dd49f684 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -108,6 +108,11 @@ struct lp_bins { struct cmd_bin tile[TILES_X][TILES_Y]; struct data_block_list data; + /** the framebuffer to render the bins into */ + struct pipe_framebuffer_state fb; + + boolean write_depth; + /** * Number of active tiles in each dimension. * This basically the framebuffer size divided by tile size diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a8212d74e36..2ea3ac6b3b1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -40,48 +40,6 @@ #include "lp_bin.h" - -/** - * Called by rasterization threads to get the next chunk of work. - * We use a lock to make sure that all the threads get the same bins. - */ -static struct lp_bins * -get_next_full_bin( struct lp_rasterizer *rast ) -{ - pipe_mutex_lock( rast->get_bin_mutex ); - if (!rast->curr_bins) { - /* this will wait until there's something in the queue */ - rast->curr_bins = lp_bins_dequeue( rast->full_bins ); - rast->release_count = 0; - - lp_bin_iter_begin( rast->curr_bins ); - } - pipe_mutex_unlock( rast->get_bin_mutex ); - return rast->curr_bins; -} - - -/** - * Called by rasterization threads after they've finished with - * the current bin. When all threads have called this, we reset - * the bin and put it into the 'empty bins' queue. - */ -static void -release_current_bin( struct lp_rasterizer *rast ) -{ - pipe_mutex_lock( rast->get_bin_mutex ); - rast->release_count++; - if (rast->release_count == rast->num_threads) { - assert(rast->curr_bins); - lp_reset_bins( rast->curr_bins ); - lp_bins_enqueue( rast->empty_bins, rast->curr_bins ); - rast->curr_bins = NULL; - } - pipe_mutex_unlock( rast->get_bin_mutex ); -} - - - /** * Begin the rasterization phase. * Map the framebuffer surfaces. Initialize the 'rast' state. @@ -525,6 +483,22 @@ lp_rast_end_tile( struct lp_rasterizer *rast, } +/** + * When all the threads are done rasterizing a bin, one thread will + * call this function to reset the bin and put it onto the empty queue. + */ +static void +release_bins( struct lp_rasterizer *rast, + struct lp_bins *bins ) +{ + util_unreference_framebuffer_state( &bins->fb ); + + lp_reset_bins( bins ); + lp_bins_enqueue( rast->empty_bins, bins ); + rast->curr_bins = NULL; +} + + /** * Rasterize commands for a single bin. * \param x, y position of the bin's tile in the framebuffer @@ -615,18 +589,23 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } } - lp_rast_begin( rast, fb, - fb->cbufs[0]!= NULL, - fb->zsbuf != NULL && write_depth ); + /* save framebuffer state in the bin */ + util_copy_framebuffer_state(&bins->fb, fb); + bins->write_depth = write_depth; if (rast->num_threads == 0) { /* no threading */ + + lp_rast_begin( rast, fb, + fb->cbufs[0]!= NULL, + fb->zsbuf != NULL && write_depth ); + lp_bin_iter_begin( bins ); rasterize_bins( rast, 0, bins, write_depth ); - /* reset bins and put into the empty queue */ - lp_reset_bins( bins ); - lp_bins_enqueue( rast->empty_bins, bins); + release_bins( rast, bins ); + + lp_rast_end( rast ); } else { /* threaded rendering! */ @@ -634,11 +613,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, lp_bins_enqueue( rast->full_bins, bins ); - /* XXX need to move/fix these */ - rast->write_depth = write_depth; - - /*lp_bin_iter_begin( bins );*/ - /* signal the threads that there's work to do */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_signal(&rast->tasks[i].work_ready); @@ -650,8 +624,6 @@ lp_rasterize_bins( struct lp_rasterizer *rast, } } - lp_rast_end( rast ); - LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -671,23 +643,53 @@ thread_func( void *init_data ) boolean debug = false; while (1) { - struct lp_bins *bins; - /* wait for work */ if (debug) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); - bins = get_next_full_bin( rast ); - assert(bins); + if (task->thread_index == 0) { + /* thread[0]: + * - get next set of bins to rasterize + * - map the framebuffer surfaces + */ + const struct pipe_framebuffer_state *fb; + boolean write_depth; + + rast->curr_bins = lp_bins_dequeue( rast->full_bins ); + + lp_bin_iter_begin( rast->curr_bins ); + + fb = &rast->curr_bins->fb; + write_depth = rast->curr_bins->write_depth; + + lp_rast_begin( rast, fb, + fb->cbufs[0] != NULL, + fb->zsbuf != NULL && write_depth ); + } + + /* Wait for all threads to get here so that threads[1+] don't + * get a null rast->curr_bins pointer. + */ + pipe_barrier_wait( &rast->barrier ); /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_bins(rast, task->thread_index, - bins, rast->write_depth); + rast->curr_bins, rast->curr_bins->write_depth); - release_current_bin( rast ); + /* wait for all threads to finish with this set of bins */ + pipe_barrier_wait( &rast->barrier ); + + if (task->thread_index == 0) { + /* thread[0]: + * - release the bins object + * - unmap the framebuffer surfaces + */ + release_bins( rast, rast->curr_bins ); + lp_rast_end( rast ); + } /* signal done with work */ if (debug) @@ -751,6 +753,9 @@ lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) create_rast_threads(rast); + /* for synchronizing rasterization threads */ + pipe_barrier_init( &rast->barrier, rast->num_threads ); + return rast; } @@ -768,6 +773,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) align_free(rast->tasks[i].tile.color); } + /* for synchronizing rasterization threads */ + pipe_barrier_destroy( &rast->barrier ); + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 4ae54ac8c1b..ba14fc36751 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -93,18 +93,6 @@ struct lp_rasterizer boolean clipped_tile; boolean check_for_clipped_tiles; - /** The incoming queue of filled bins to rasterize */ - struct lp_bins_queue *full_bins; - /** The outgoing queue of emptied bins to return to setup modulee */ - struct lp_bins_queue *empty_bins; - - pipe_mutex get_bin_mutex; - - /** The bins currently being rasterized by the threads */ - struct lp_bins *curr_bins; - /** Counter to determine when all threads are done with current bin */ - unsigned release_count; - /* Framebuffer stuff */ struct pipe_screen *screen; @@ -122,14 +110,22 @@ struct lp_rasterizer char clear_stencil; } state; + /** The incoming queue of filled bins to rasterize */ + struct lp_bins_queue *full_bins; + /** The outgoing queue of emptied bins to return to setup modulee */ + struct lp_bins_queue *empty_bins; + + /** The bins currently being rasterized by the threads */ + struct lp_bins *curr_bins; + /** A task object for each rasterization thread */ struct lp_rasterizer_task tasks[MAX_THREADS]; unsigned num_threads; pipe_thread threads[MAX_THREADS]; - struct lp_bins *bins; - boolean write_depth; + /** For synchronizing the rasterization threads */ + pipe_barrier barrier; }; -- cgit v1.2.3 From 92dc0f92b0f0fa2f3e4ba832ef2232169ce19ce8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 15:00:28 -0700 Subject: llvmpipe: implement lp_rast_load_color() --- src/gallium/drivers/llvmpipe/lp_rast.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2ea3ac6b3b1..9020cf9cec2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -210,9 +210,31 @@ void lp_rast_load_color( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg) { - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; + int w = TILE_SIZE; + int h = TILE_SIZE; + + LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; + + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; - /* call u_tile func to load colors from surface */ + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + lp_tile_read_4ub(rast->cbuf_transfer->format, + rast->tasks[thread_index].tile.color, + rast->cbuf_map, + rast->cbuf_transfer->stride, + x, y, + w, h); } -- cgit v1.2.3 From edbaca6fd14dace44637d994bbddad3cb0a5fafe Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:32:08 -0700 Subject: llvmpipe: initial fence implementation --- src/gallium/drivers/llvmpipe/lp_fence.c | 109 ++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_fence.h | 60 ++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_fence.c create mode 100644 src/gallium/drivers/llvmpipe/lp_fence.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c new file mode 100644 index 00000000000..14fbea6d993 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "lp_fence.h" + + +struct lp_fence * +lp_fence_create(unsigned rank) +{ + struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + + pipe_reference_init(&fence->reference, 1); + + pipe_mutex_init(fence->mutex); + pipe_condvar_init(fence->signalled); + + fence->rank = rank; + + return fence; +} + + +static void +lp_fence_destroy(struct lp_fence *fence) +{ + pipe_mutex_destroy(fence->mutex); + pipe_condvar_destroy(fence->signalled); + FREE(fence); +} + + +static void +llvmpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct lp_fence *old = (struct lp_fence *) *ptr; + struct lp_fence *f = (struct lp_fence *) fence; + + if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) { + lp_fence_destroy(old); + } +} + + +static int +llvmpipe_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence; + + return f->count == f->rank; +} + + +static int +llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + unsigned flag) +{ + struct lp_fence *fence = (struct lp_fence *) fence_handle; + + pipe_mutex_lock(fence->mutex); + while (fence->count < fence->rank) { + pipe_condvar_wait(fence->signalled, fence->mutex); + } + pipe_mutex_unlock(fence->mutex); + + return 0; +} + + + + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen) +{ + screen->fence_reference = llvmpipe_fence_reference; + screen->fence_signalled = llvmpipe_fence_signalled; + screen->fence_finish = llvmpipe_fence_finish; +} diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h new file mode 100644 index 00000000000..d45318f9e47 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_FENCE_H +#define LP_FENCE_H + + +#include "pipe/p_refcnt.h" +#include "pipe/p_thread.h" + + +struct pipe_screen; + + +struct lp_fence +{ + struct pipe_reference reference; + + pipe_mutex mutex; + pipe_condvar signalled; + + unsigned rank; + unsigned count; +}; + + +struct lp_fence * +lp_fence_create(unsigned rank); + + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); + + +#endif /* LP_FENCE_H */ -- cgit v1.2.3 From 2876b684de39dbdf803b3f3d1ac231e76fb4357e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:32:45 -0700 Subject: llvmpipe: remove old fence code, compile new lp_fence.c file --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_buffer.c | 31 ------------------------------- src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ 4 files changed, 4 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4cc4c88ffd2..7c4cf320b97 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -32,6 +32,7 @@ C_SOURCES = \ lp_clear.c \ lp_context.c \ lp_draw_arrays.c \ + lp_fence.c \ lp_flush.c \ lp_jit.c \ lp_prim_vbuf.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 19ef6861672..bc725b65f62 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -45,6 +45,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_clear.c', 'lp_context.c', 'lp_draw_arrays.c', + 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', 'lp_prim_vbuf.c', diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.c b/src/gallium/drivers/llvmpipe/lp_buffer.c index 66f1f8e1383..a5ef221a216 100644 --- a/src/gallium/drivers/llvmpipe/lp_buffer.c +++ b/src/gallium/drivers/llvmpipe/lp_buffer.c @@ -108,32 +108,6 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen, } -static void -llvmpipe_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ -} - - -static int -llvmpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static int -llvmpipe_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - void llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen) { @@ -142,9 +116,4 @@ llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen) screen->buffer_map = llvmpipe_buffer_map; screen->buffer_unmap = llvmpipe_buffer_unmap; screen->buffer_destroy = llvmpipe_buffer_destroy; - - screen->fence_reference = llvmpipe_fence_reference; - screen->fence_signalled = llvmpipe_fence_signalled; - screen->fence_finish = llvmpipe_fence_finish; - } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 87fddbd13f1..017496ea5fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -32,6 +32,7 @@ #include "lp_texture.h" #include "lp_buffer.h" +#include "lp_fence.h" #include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" @@ -253,6 +254,7 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) llvmpipe_init_screen_texture_funcs(&screen->base); llvmpipe_init_screen_buffer_funcs(&screen->base); + llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); -- cgit v1.2.3 From 8736ee1e7d45c2a3868d46b2ecba7471518cd9b6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:33:30 -0700 Subject: llvmpipe: added lp_bin_get_num_bins() --- src/gallium/drivers/llvmpipe/lp_bin.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 565dd49f684..e763b16ffe6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -258,6 +258,13 @@ lp_bin_state_command( struct lp_bins *bins, const union lp_rast_cmd_arg arg ); +static INLINE unsigned +lp_bin_get_num_bins( const struct lp_bins *bins ) +{ + return bins->tiles_x * bins->tiles_y; +} + + void lp_bin_iter_begin( struct lp_bins *bins ); -- cgit v1.2.3 From 932374073863379e9da862d6115410889f038154 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:45:52 -0700 Subject: llvmpipe: added lp_rast_get_num_threads() --- src/gallium/drivers/llvmpipe/lp_rast.c | 7 +++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9020cf9cec2..6b7aa8d729d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -801,3 +801,10 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) FREE(rast); } + +/** Return number of rasterization threads */ +unsigned +lp_rast_get_num_threads( struct lp_rasterizer *rast ) +{ + return rast->num_threads; +} diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 75f7b744045..785be49b700 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -133,6 +133,8 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, void lp_rast_destroy( struct lp_rasterizer * ); +unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); + void lp_rasterize_bins( struct lp_rasterizer *rast, struct lp_bins *bins, const struct pipe_framebuffer_state *fb, -- cgit v1.2.3 From 4b70af918dd9040a6987c6a55e76e49f0e3f90bf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:57:45 -0700 Subject: llvmpipe: added lp_rast_fence() bin function --- src/gallium/drivers/llvmpipe/lp_rast.c | 25 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast.h | 14 ++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6b7aa8d729d..3e7b3d7ab40 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -32,6 +32,7 @@ #include "lp_bin_queue.h" #include "lp_debug.h" +#include "lp_fence.h" #include "lp_state.h" #include "lp_rast.h" #include "lp_rast_priv.h" @@ -505,6 +506,30 @@ lp_rast_end_tile( struct lp_rasterizer *rast, } +/** + * Signal on a fence. This is called during bin execution/rasterization. + * Called per thread. + */ +void lp_rast_fence( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + struct lp_fence *fence = arg.fence; + + pipe_mutex_lock( fence->mutex ); + + fence->count++; + assert(fence->count <= fence->rank); + + LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); + + pipe_condvar_signal( fence->signalled ); + + pipe_mutex_unlock( fence->mutex ); +} + + /** * When all the threads are done rasterizing a bin, one thread will * call this function to reset the bin and put it onto the empty queue. diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 785be49b700..bd8f1ae1c9b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -44,6 +44,7 @@ struct lp_rasterizer; struct lp_bins; struct lp_bins_queue; +struct lp_fence; struct cmd_bin; struct pipe_screen; @@ -148,6 +149,7 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; uint8_t clear_color[4]; unsigned clear_zstencil; + struct lp_fence *fence; }; @@ -177,6 +179,15 @@ lp_rast_arg_state( const struct lp_rast_state *state ) return arg; } +static INLINE const union lp_rast_cmd_arg +lp_rast_arg_fence( struct lp_fence *fence ) +{ + union lp_rast_cmd_arg arg; + arg.fence = fence; + return arg; +} + + static INLINE const union lp_rast_cmd_arg lp_rast_arg_null( void ) { @@ -221,5 +232,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *, unsigned thread_index, const union lp_rast_cmd_arg ); +void lp_rast_fence( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); #endif -- cgit v1.2.3 From 6cbb1219a3f6b83ee4d24aecb61f5b5943e3cac3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Dec 2009 17:59:26 -0700 Subject: llvmpipe: checkpoint: plug in the new fencing code This has only been very lightly tested. More work to come. --- src/gallium/drivers/llvmpipe/lp_flush.c | 20 +++++++++++++++++--- src/gallium/drivers/llvmpipe/lp_setup.c | 23 +++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup.h | 4 ++++ 3 files changed, 44 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index f7a1d897019..e6519cb216b 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -49,6 +49,23 @@ llvmpipe_flush( struct pipe_context *pipe, draw_flush(llvmpipe->draw); + if (fence) { + if ((flags & (PIPE_FLUSH_SWAPBUFFERS | + PIPE_FLUSH_RENDER_CACHE))) { + /* if we're going to flush the setup/rasterization modules, emit + * a fence. + * XXX this (and the code below) may need fine tuning... + */ + *fence = lp_setup_fence( llvmpipe->setup ); + } + else { + *fence = NULL; + } + } + + /* XXX the lp_setup_flush(flags) param is not a bool, and it's ignored + * at this time! + */ if (flags & PIPE_FLUSH_SWAPBUFFERS) { lp_setup_flush( llvmpipe->setup, FALSE ); } @@ -68,8 +85,5 @@ llvmpipe_flush( struct pipe_context *pipe, ++frame_no; } #endif - - if (fence) - *fence = NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d976934a5dd..3967b4f21ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -39,6 +39,7 @@ #include "lp_bin.h" #include "lp_bin_queue.h" #include "lp_debug.h" +#include "lp_fence.h" #include "lp_state.h" #include "lp_buffer.h" #include "lp_texture.h" @@ -308,6 +309,28 @@ lp_setup_clear( struct setup_context *setup, } +/** + * Emit a fence. + */ +struct pipe_fence_handle * +lp_setup_fence( struct setup_context *setup ) +{ + struct lp_bins *bins = lp_setup_get_current_bins(setup); + const unsigned rank = lp_bin_get_num_bins( bins ); + struct lp_fence *fence = lp_fence_create(rank); + + LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); + + set_state( setup, SETUP_ACTIVE ); + + /* insert the fence into all command bins */ + lp_bin_everywhere( bins, + lp_rast_fence, + lp_rast_arg_fence(fence) ); + + return (struct pipe_fence_handle *) fence; +} + void lp_setup_set_triangle_state( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 66a7f29f1e7..5c606e86afc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -67,6 +67,10 @@ lp_setup_clear(struct setup_context *setup, unsigned clear_stencil, unsigned flags); +struct pipe_fence_handle * +lp_setup_fence( struct setup_context *setup ); + + void lp_setup_tri(struct setup_context *setup, const float (*v0)[4], -- cgit v1.2.3 From 314d3cd751448f9ae36126937b3bbf0330542da3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 12 Dec 2009 20:19:46 +0000 Subject: llvmpipe: rename one of the two rasterize_bins functions --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3967b4f21ed..6d20975cb81 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -128,8 +128,8 @@ static void reset_context( struct setup_context *setup ) /** Rasterize all tile's bins */ static void -rasterize_bins( struct setup_context *setup, - boolean write_depth ) +lp_setup_rasterize_bins( struct setup_context *setup, + boolean write_depth ) { struct lp_bins *bins = lp_setup_get_current_bins(setup); @@ -189,7 +189,7 @@ execute_clears( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - rasterize_bins( setup, TRUE ); + lp_setup_rasterize_bins( setup, TRUE ); } @@ -220,7 +220,7 @@ set_state( struct setup_context *setup, if (old_state == SETUP_CLEARED) execute_clears( setup ); else - rasterize_bins( setup, TRUE ); + lp_setup_rasterize_bins( setup, TRUE ); break; } -- cgit v1.2.3 From 39dd7108bf6014a8430dffc290e98c7b47432cd3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 12 Dec 2009 20:29:39 +0000 Subject: llvmpipe: rename queue size to count --- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 44 ++++++++++++++--------------- src/gallium/drivers/llvmpipe/lp_bin_queue.h | 4 +-- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c index b4bc439089e..b39b46b72b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.c @@ -51,9 +51,9 @@ struct lp_bins_queue * probably always be pretty short. */ struct lp_bins *bins[MAX_BINS]; - unsigned size; + unsigned count; - pipe_condvar size_change; + pipe_condvar count_change; pipe_mutex mutex; }; @@ -65,7 +65,7 @@ lp_bins_queue_create(void) { struct lp_bins_queue *queue = CALLOC_STRUCT(lp_bins_queue); if (queue) { - pipe_condvar_init(queue->size_change); + pipe_condvar_init(queue->count_change); pipe_mutex_init(queue->mutex); } return queue; @@ -76,7 +76,7 @@ lp_bins_queue_create(void) void lp_bins_queue_destroy(struct lp_bins_queue *queue) { - pipe_condvar_destroy(queue->size_change); + pipe_condvar_destroy(queue->count_change); pipe_mutex_destroy(queue->mutex); } @@ -89,24 +89,24 @@ lp_bins_dequeue(struct lp_bins_queue *queue) unsigned i; pipe_mutex_lock(queue->mutex); - while (queue->size == 0) { - pipe_condvar_wait(queue->size_change, queue->mutex); + while (queue->count == 0) { + pipe_condvar_wait(queue->count_change, queue->mutex); } - assert(queue->size >= 1); + assert(queue->count >= 1); /* get head */ bins = queue->bins[0]; /* shift entries */ - for (i = 0; i < queue->size - 1; i++) { + for (i = 0; i < queue->count - 1; i++) { queue->bins[i] = queue->bins[i + 1]; } - queue->size--; + queue->count--; /* signal size change */ - pipe_condvar_signal(queue->size_change); + pipe_condvar_signal(queue->count_change); pipe_mutex_unlock(queue->mutex); @@ -120,21 +120,21 @@ lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) { pipe_mutex_lock(queue->mutex); - assert(queue->size < MAX_BINS); + assert(queue->count < MAX_BINS); /* debug: check that bins is not already in the queue */ if (0) { unsigned i; - for (i = 0; i < queue->size; i++) { + for (i = 0; i < queue->count; i++) { assert(queue->bins[i] != bins); } } /* add to end */ - queue->bins[queue->size++] = bins; + queue->bins[queue->count++] = bins; /* signal size change */ - pipe_condvar_signal(queue->size_change); + pipe_condvar_signal(queue->count_change); pipe_mutex_unlock(queue->mutex); } @@ -142,23 +142,23 @@ lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) /** Return number of entries in the queue */ unsigned -lp_bins_queue_size(struct lp_bins_queue *queue) +lp_bins_queue_count(struct lp_bins_queue *queue) { - unsigned sz; + unsigned count; pipe_mutex_lock(queue->mutex); - sz = queue->size; + count = queue->count; pipe_mutex_unlock(queue->mutex); - return sz; + return count; } -/** Wait until the queue as 'size' entries */ +/** Wait until the queue has exactly 'count' entries */ void -lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size) +lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned count) { pipe_mutex_lock(queue->mutex); - while (queue->size != size) { - pipe_condvar_wait(queue->size_change, queue->mutex); + while (queue->count != count) { + pipe_condvar_wait(queue->count_change, queue->mutex); } pipe_mutex_unlock(queue->mutex); } diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.h b/src/gallium/drivers/llvmpipe/lp_bin_queue.h index 8946a541585..1a0f8832dba 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.h +++ b/src/gallium/drivers/llvmpipe/lp_bin_queue.h @@ -46,10 +46,10 @@ void lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins); unsigned -lp_bins_queue_size(struct lp_bins_queue *queue); +lp_bins_queue_count(struct lp_bins_queue *queue); void -lp_bins_queue_wait_size(struct lp_bins_queue *queue, unsigned size); +lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned size); #endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6d20975cb81..0972c167841 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -588,7 +588,7 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); /* free the bins in the 'empty' queue */ - while (lp_bins_queue_size(setup->empty_bins) > 0) { + while (lp_bins_queue_count(setup->empty_bins) > 0) { struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); if (!bins) break; -- cgit v1.2.3 From 663750d5564a225b4720f7ee8bea93ffb309fc88 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 13 Dec 2009 18:17:25 +0000 Subject: llvmpipe: rename bins to scene It was pretty confusing having an entity named "bin" and another named "bins", not least because sometimes there was a need to talk about >1 of the "bins" objects, which couldn't be pluralized any further... Scene is a term used in a bunch of places to talk about what a binner operates on, so it's a decent choice here. --- src/gallium/drivers/llvmpipe/SConscript | 10 +- src/gallium/drivers/llvmpipe/lp_bin.c | 310 ------------------------ src/gallium/drivers/llvmpipe/lp_bin.h | 275 --------------------- src/gallium/drivers/llvmpipe/lp_bin_queue.c | 164 ------------- src/gallium/drivers/llvmpipe/lp_bin_queue.h | 55 ----- src/gallium/drivers/llvmpipe/lp_rast.c | 96 ++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 14 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 12 +- src/gallium/drivers/llvmpipe/lp_scene.c | 310 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene.h | 276 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene_queue.c | 164 +++++++++++++ src/gallium/drivers/llvmpipe/lp_scene_queue.h | 55 +++++ src/gallium/drivers/llvmpipe/lp_setup.c | 112 ++++----- src/gallium/drivers/llvmpipe/lp_setup_context.h | 12 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 36 +-- 15 files changed, 952 insertions(+), 949 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_bin_queue.h create mode 100644 src/gallium/drivers/llvmpipe/lp_scene.c create mode 100644 src/gallium/drivers/llvmpipe/lp_scene.h create mode 100644 src/gallium/drivers/llvmpipe/lp_scene_queue.c create mode 100644 src/gallium/drivers/llvmpipe/lp_scene_queue.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index bc725b65f62..f0b71ef3eee 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -19,8 +19,6 @@ env.CodeGenerate( llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ - 'lp_bin.c', - 'lp_bin_queue.c', 'lp_bld_alpha.c', 'lp_bld_arit.c', 'lp_bld_blend_aos.c', @@ -35,9 +33,9 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_logic.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', - 'lp_bld_logic.c', 'lp_bld_swizzle.c', 'lp_bld_tgsi_soa.c', 'lp_bld_type.c', @@ -50,11 +48,13 @@ llvmpipe = env.ConvenienceLibrary( 'lp_jit.c', 'lp_prim_vbuf.c', 'lp_query.c', + 'lp_scene.c', + 'lp_scene_queue.c', + 'lp_screen.c', 'lp_setup.c', - 'lp_setup_tri.c', 'lp_setup_line.c', 'lp_setup_point.c', - 'lp_screen.c', + 'lp_setup_tri.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bin.c b/src/gallium/drivers/llvmpipe/lp_bin.c deleted file mode 100644 index 703cdd2de59..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin.c +++ /dev/null @@ -1,310 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_bin.h" - - -struct lp_bins * -lp_bins_create(void) -{ - struct lp_bins *bins = CALLOC_STRUCT(lp_bins); - if (bins) - lp_init_bins(bins); - return bins; -} - - -void -lp_bins_destroy(struct lp_bins *bins) -{ - lp_reset_bins(bins); - lp_free_bin_data(bins); - FREE(bins); -} - - -void -lp_init_bins(struct lp_bins *bins) -{ - unsigned i, j; - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); - } - - bins->data.head = - bins->data.tail = CALLOC_STRUCT(data_block); - - pipe_mutex_init(bins->mutex); -} - - -/** - * Set bins to empty state. - */ -void -lp_reset_bins(struct lp_bins *bins ) -{ - unsigned i, j; - - /* Free all but last binner command lists: - */ - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - struct cmd_block_list *list = &bin->commands; - struct cmd_block *block; - struct cmd_block *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->count = 0; - } - } - - /* Free all but last binned data block: - */ - { - struct data_block_list *list = &bins->data; - struct data_block *block, *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->used = 0; - } -} - - -/** - * Free all data associated with the given bin, but don't free(bins). - */ -void -lp_free_bin_data(struct lp_bins *bins) -{ - unsigned i, j; - - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - /* lp_reset_bins() should have been already called */ - assert(bin->commands.head == bin->commands.tail); - FREE(bin->commands.head); - bin->commands.head = NULL; - bin->commands.tail = NULL; - } - - FREE(bins->data.head); - bins->data.head = NULL; - - pipe_mutex_destroy(bins->mutex); -} - - -void -lp_bin_set_framebuffer_size( struct lp_bins *bins, - unsigned width, unsigned height ) -{ - bins->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; - bins->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; -} - - -void -lp_bin_new_cmd_block( struct cmd_block_list *list ) -{ - struct cmd_block *block = MALLOC_STRUCT(cmd_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->count = 0; -} - - -void -lp_bin_new_data_block( struct data_block_list *list ) -{ - struct data_block *block = MALLOC_STRUCT(data_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->used = 0; -} - - -/** Return number of bytes used for bin data */ -unsigned -lp_bin_data_size( const struct lp_bins *bins ) -{ - unsigned size = 0; - const struct data_block *block; - for (block = bins->data.head; block; block = block->next) { - size += block->used; - } - return size; -} - - -/** Return number of bytes used for a tile bin */ -unsigned -lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ) -{ - struct cmd_bin *bin = lp_get_bin((struct lp_bins *) bins, x, y); - const struct cmd_block *cmd; - unsigned size = 0; - for (cmd = bin->commands.head; cmd; cmd = cmd->next) { - size += (cmd->count * - (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); - } - return size; -} - - -/** - * Return last command in the bin - */ -static lp_rast_cmd -lp_get_last_command( const struct cmd_bin *bin ) -{ - const struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - if (i > 0) - return tail->cmd[i - 1]; - else - return NULL; -} - - -/** - * Replace the arg of the last command in the bin. - */ -static void -lp_replace_last_command_arg( struct cmd_bin *bin, - const union lp_rast_cmd_arg arg ) -{ - struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - assert(i > 0); - tail->arg[i - 1] = arg; -} - - - -/** - * Put a state-change command into all bins. - * If we find that the last command in a bin was also a state-change - * command, we can simply replace that one with the new one. - */ -void -lp_bin_state_command( struct lp_bins *bins, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); - lp_rast_cmd last_cmd = lp_get_last_command(bin); - if (last_cmd == cmd) { - lp_replace_last_command_arg(bin, arg); - } - else { - lp_bin_command( bins, i, j, cmd, arg ); - } - } - } -} - - -/** advance curr_x,y to the next bin */ -static boolean -next_bin(struct lp_bins *bins) -{ - bins->curr_x++; - if (bins->curr_x >= bins->tiles_x) { - bins->curr_x = 0; - bins->curr_y++; - } - if (bins->curr_y >= bins->tiles_y) { - /* no more bins */ - return FALSE; - } - return TRUE; -} - - -void -lp_bin_iter_begin( struct lp_bins *bins ) -{ - bins->curr_x = bins->curr_y = -1; -} - - -/** - * Return point to next bin to be rendered. - * The lp_bins::curr_x and ::curr_y fields will be advanced. - * Multiple rendering threads will call this function to get a chunk - * of work (a bin) to work on. - */ -struct cmd_bin * -lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ) -{ - struct cmd_bin *bin = NULL; - - pipe_mutex_lock(bins->mutex); - - if (bins->curr_x < 0) { - /* first bin */ - bins->curr_x = 0; - bins->curr_y = 0; - } - else if (!next_bin(bins)) { - /* no more bins left */ - goto end; - } - - bin = lp_get_bin(bins, bins->curr_x, bins->curr_y); - *bin_x = bins->curr_x; - *bin_y = bins->curr_y; - -end: - /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ - pipe_mutex_unlock(bins->mutex); - return bin; -} diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h deleted file mode 100644 index e763b16ffe6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ /dev/null @@ -1,275 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Binner data structures and bin-related functions. - * Note: the "setup" code is concerned with building bins while - * The "rast" code is concerned with consuming/executing bins. - */ - -#ifndef LP_BIN_H -#define LP_BIN_H - -#include "pipe/p_thread.h" -#include "lp_tile_soa.h" -#include "lp_rast.h" - - -/* We're limited to 2K by 2K for 32bit fixed point rasterization. - * Will need a 64-bit version for larger framebuffers. - */ -#define MAXHEIGHT 2048 -#define MAXWIDTH 2048 -#define TILES_X (MAXWIDTH / TILE_SIZE) -#define TILES_Y (MAXHEIGHT / TILE_SIZE) - - -#define CMD_BLOCK_MAX 128 -#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) - - - -/* switch to a non-pointer value for this: - */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, - unsigned thread_index, - const union lp_rast_cmd_arg ); - -struct cmd_block { - lp_rast_cmd cmd[CMD_BLOCK_MAX]; - union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; - unsigned count; - struct cmd_block *next; -}; - -struct data_block { - ubyte data[DATA_BLOCK_SIZE]; - unsigned used; - struct data_block *next; -}; - -struct cmd_block_list { - struct cmd_block *head; - struct cmd_block *tail; -}; - -/** - * For each screen tile we have one of these bins. - */ -struct cmd_bin { - struct cmd_block_list commands; -}; - - -/** - * This stores bulk data which is shared by all bins. - * Examples include triangle data and state data. The commands in - * the per-tile bins will point to chunks of data in this structure. - */ -struct data_block_list { - struct data_block *head; - struct data_block *tail; -}; - - -/** - * All bins and bin data are contained here. - * Per-bin data goes into the 'tile' bins. - * Shared bin data goes into the 'data' buffer. - * When there are multiple threads, will want to double-buffer the - * bin arrays: - */ -struct lp_bins { - struct cmd_bin tile[TILES_X][TILES_Y]; - struct data_block_list data; - - /** the framebuffer to render the bins into */ - struct pipe_framebuffer_state fb; - - boolean write_depth; - - /** - * Number of active tiles in each dimension. - * This basically the framebuffer size divided by tile size - */ - unsigned tiles_x, tiles_y; - - int curr_x, curr_y; /**< for iterating over bins */ - pipe_mutex mutex; -}; - - - -struct lp_bins *lp_bins_create(void); - -void lp_bins_destroy(struct lp_bins *bins); - - -void lp_init_bins(struct lp_bins *bins); - -void lp_reset_bins(struct lp_bins *bins ); - -void lp_free_bin_data(struct lp_bins *bins); - -void lp_bin_set_framebuffer_size( struct lp_bins *bins, - unsigned width, unsigned height ); - -void lp_bin_new_data_block( struct data_block_list *list ); - -void lp_bin_new_cmd_block( struct cmd_block_list *list ); - -unsigned lp_bin_data_size( const struct lp_bins *bins ); - -unsigned lp_bin_cmd_size( const struct lp_bins *bins, unsigned x, unsigned y ); - - -/** - * Allocate space for a command/data in the bin's data buffer. - * Grow the block list if needed. - */ -static INLINE void * -lp_bin_alloc( struct lp_bins *bins, unsigned size) -{ - struct data_block_list *list = &bins->data; - - if (list->tail->used + size > DATA_BLOCK_SIZE) { - lp_bin_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - tail->used += size; - return data; - } -} - - -/** - * As above, but with specific alignment. - */ -static INLINE void * -lp_bin_alloc_aligned( struct lp_bins *bins, unsigned size, - unsigned alignment ) -{ - struct data_block_list *list = &bins->data; - - if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { - lp_bin_new_data_block( list ); - } - - { - struct data_block *tail = list->tail; - ubyte *data = tail->data + tail->used; - unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; - tail->used += offset + size; - return data + offset; - } -} - - -/* Put back data if we decide not to use it, eg. culled triangles. - */ -static INLINE void -lp_bin_putback_data( struct lp_bins *bins, unsigned size) -{ - struct data_block_list *list = &bins->data; - assert(list->tail->used >= size); - list->tail->used -= size; -} - - -/** Return pointer to a particular tile's bin. */ -static INLINE struct cmd_bin * -lp_get_bin(struct lp_bins *bins, unsigned x, unsigned y) -{ - return &bins->tile[x][y]; -} - - - -/* Add a command to bin[x][y]. - */ -static INLINE void -lp_bin_command( struct lp_bins *bins, - unsigned x, unsigned y, - lp_rast_cmd cmd, - union lp_rast_cmd_arg arg ) -{ - struct cmd_bin *bin = lp_get_bin(bins, x, y); - struct cmd_block_list *list = &bin->commands; - - if (list->tail->count == CMD_BLOCK_MAX) { - lp_bin_new_cmd_block( list ); - } - - { - struct cmd_block *tail = list->tail; - unsigned i = tail->count; - tail->cmd[i] = cmd; - tail->arg[i] = arg; - tail->count++; - } -} - - -/* Add a command to all active bins. - */ -static INLINE void -lp_bin_everywhere( struct lp_bins *bins, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) - for (j = 0; j < bins->tiles_y; j++) - lp_bin_command( bins, i, j, cmd, arg ); -} - - -void -lp_bin_state_command( struct lp_bins *bins, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ); - - -static INLINE unsigned -lp_bin_get_num_bins( const struct lp_bins *bins ) -{ - return bins->tiles_x * bins->tiles_y; -} - - -void -lp_bin_iter_begin( struct lp_bins *bins ); - -struct cmd_bin * -lp_bin_iter_next( struct lp_bins *bins, int *bin_x, int *bin_y ); - - -#endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.c b/src/gallium/drivers/llvmpipe/lp_bin_queue.c deleted file mode 100644 index b39b46b72b5..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.c +++ /dev/null @@ -1,164 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Bin queue. We'll use two queues. One contains "full" bins which - * are produced by the "setup" code. The other contains "empty" bins - * which are produced by the "rast" code when it finishes rendering a bin. - */ - - -#include "pipe/p_thread.h" -#include "util/u_memory.h" -#include "lp_bin_queue.h" - - - -#define MAX_BINS 4 - - -/** - * A queue of bins - */ -struct lp_bins_queue -{ - /** XXX might use a linked list here somedone, but the list will - * probably always be pretty short. - */ - struct lp_bins *bins[MAX_BINS]; - unsigned count; - - pipe_condvar count_change; - pipe_mutex mutex; -}; - - - -/** Allocate a new bins queue */ -struct lp_bins_queue * -lp_bins_queue_create(void) -{ - struct lp_bins_queue *queue = CALLOC_STRUCT(lp_bins_queue); - if (queue) { - pipe_condvar_init(queue->count_change); - pipe_mutex_init(queue->mutex); - } - return queue; -} - - -/** Delete a new bins queue */ -void -lp_bins_queue_destroy(struct lp_bins_queue *queue) -{ - pipe_condvar_destroy(queue->count_change); - pipe_mutex_destroy(queue->mutex); -} - - -/** Remove first lp_bins from head of queue */ -struct lp_bins * -lp_bins_dequeue(struct lp_bins_queue *queue) -{ - struct lp_bins *bins; - unsigned i; - - pipe_mutex_lock(queue->mutex); - while (queue->count == 0) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - - assert(queue->count >= 1); - - /* get head */ - bins = queue->bins[0]; - - /* shift entries */ - for (i = 0; i < queue->count - 1; i++) { - queue->bins[i] = queue->bins[i + 1]; - } - - queue->count--; - - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); - - return bins; -} - - -/** Add an lp_bins to tail of queue */ -void -lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins) -{ - pipe_mutex_lock(queue->mutex); - - assert(queue->count < MAX_BINS); - - /* debug: check that bins is not already in the queue */ - if (0) { - unsigned i; - for (i = 0; i < queue->count; i++) { - assert(queue->bins[i] != bins); - } - } - - /* add to end */ - queue->bins[queue->count++] = bins; - - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); -} - - -/** Return number of entries in the queue */ -unsigned -lp_bins_queue_count(struct lp_bins_queue *queue) -{ - unsigned count; - pipe_mutex_lock(queue->mutex); - count = queue->count; - pipe_mutex_unlock(queue->mutex); - return count; -} - - -/** Wait until the queue has exactly 'count' entries */ -void -lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned count) -{ - pipe_mutex_lock(queue->mutex); - while (queue->count != count) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - pipe_mutex_unlock(queue->mutex); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bin_queue.h b/src/gallium/drivers/llvmpipe/lp_bin_queue.h deleted file mode 100644 index 1a0f8832dba..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bin_queue.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_BIN_QUEUE -#define LP_BIN_QUEUE - -struct lp_bin_queue; -struct lp_bins; - - -struct lp_bins_queue * -lp_bins_queue_create(void); - -void -lp_bins_queue_destroy(struct lp_bins_queue *queue); - -struct lp_bins * -lp_bins_dequeue(struct lp_bins_queue *queue); - -void -lp_bins_enqueue(struct lp_bins_queue *queue, struct lp_bins *bins); - -unsigned -lp_bins_queue_count(struct lp_bins_queue *queue); - -void -lp_bins_queue_wait_count(struct lp_bins_queue *queue, unsigned size); - - -#endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3e7b3d7ab40..fd9cd67d859 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -30,7 +30,7 @@ #include "util/u_cpu_detect.h" #include "util/u_surface.h" -#include "lp_bin_queue.h" +#include "lp_scene_queue.h" #include "lp_debug.h" #include "lp_fence.h" #include "lp_state.h" @@ -38,7 +38,7 @@ #include "lp_rast_priv.h" #include "lp_tile_soa.h" #include "lp_bld_debug.h" -#include "lp_bin.h" +#include "lp_scene.h" /** @@ -531,18 +531,18 @@ void lp_rast_fence( struct lp_rasterizer *rast, /** - * When all the threads are done rasterizing a bin, one thread will - * call this function to reset the bin and put it onto the empty queue. + * When all the threads are done rasterizing a scene, one thread will + * call this function to reset the scene and put it onto the empty queue. */ static void -release_bins( struct lp_rasterizer *rast, - struct lp_bins *bins ) +release_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ) { - util_unreference_framebuffer_state( &bins->fb ); + util_unreference_framebuffer_state( &scene->fb ); - lp_reset_bins( bins ); - lp_bins_enqueue( rast->empty_bins, bins ); - rast->curr_bins = NULL; + lp_scene_reset( scene ); + lp_scene_enqueue( rast->empty_scenes, scene ); + rast->curr_scene = NULL; } @@ -576,22 +576,22 @@ rasterize_bin( struct lp_rasterizer *rast, /** - * Rasterize/execute all bins. + * Rasterize/execute all bins within a scene. * Called per thread. */ static void -rasterize_bins( struct lp_rasterizer *rast, +rasterize_scene( struct lp_rasterizer *rast, unsigned thread_index, - struct lp_bins *bins, + struct lp_scene *scene, bool write_depth ) { - /* loop over tile bins, rasterize each */ + /* loop over scene bins, rasterize each */ #if 0 { unsigned i, j; - for (i = 0; i < bins->tiles_x; i++) { - for (j = 0; j < bins->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(bins, i, j); + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(scene, i, j); rasterize_bin( rast, thread_index, bin, i * TILE_SIZE, j * TILE_SIZE ); } @@ -602,8 +602,8 @@ rasterize_bins( struct lp_rasterizer *rast, struct cmd_bin *bin; int x, y; - assert(bins); - while ((bin = lp_bin_iter_next(bins, &x, &y))) { + assert(scene); + while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } } @@ -615,8 +615,8 @@ rasterize_bins( struct lp_rasterizer *rast, * Called by setup module when it has something for us to render. */ void -lp_rasterize_bins( struct lp_rasterizer *rast, - struct lp_bins *bins, +lp_rasterize_scene( struct lp_rasterizer *rast, + struct lp_scene *scene, const struct pipe_framebuffer_state *fb, bool write_depth ) { @@ -626,19 +626,19 @@ lp_rasterize_bins( struct lp_rasterizer *rast, if (debug) { unsigned x, y; - printf("rasterize bins:\n"); - printf(" data size: %u\n", lp_bin_data_size(bins)); - for (y = 0; y < bins->tiles_y; y++) { - for (x = 0; x < bins->tiles_x; x++) { + printf("rasterize scene:\n"); + printf(" data size: %u\n", lp_scene_data_size(scene)); + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { printf(" bin %u, %u size: %u\n", x, y, - lp_bin_cmd_size(bins, x, y)); + lp_scene_bin_size(scene, x, y)); } } } /* save framebuffer state in the bin */ - util_copy_framebuffer_state(&bins->fb, fb); - bins->write_depth = write_depth; + util_copy_framebuffer_state(&scene->fb, fb); + scene->write_depth = write_depth; if (rast->num_threads == 0) { /* no threading */ @@ -647,10 +647,10 @@ lp_rasterize_bins( struct lp_rasterizer *rast, fb->cbufs[0]!= NULL, fb->zsbuf != NULL && write_depth ); - lp_bin_iter_begin( bins ); - rasterize_bins( rast, 0, bins, write_depth ); + lp_scene_bin_iter_begin( scene ); + rasterize_scene( rast, 0, scene, write_depth ); - release_bins( rast, bins ); + release_scene( rast, scene ); lp_rast_end( rast ); } @@ -658,7 +658,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, /* threaded rendering! */ unsigned i; - lp_bins_enqueue( rast->full_bins, bins ); + lp_scene_enqueue( rast->full_scenes, scene ); /* signal the threads that there's work to do */ for (i = 0; i < rast->num_threads; i++) { @@ -697,18 +697,18 @@ thread_func( void *init_data ) if (task->thread_index == 0) { /* thread[0]: - * - get next set of bins to rasterize + * - get next scene to rasterize * - map the framebuffer surfaces */ const struct pipe_framebuffer_state *fb; boolean write_depth; - rast->curr_bins = lp_bins_dequeue( rast->full_bins ); + rast->curr_scene = lp_scene_dequeue( rast->full_scenes ); - lp_bin_iter_begin( rast->curr_bins ); + lp_scene_bin_iter_begin( rast->curr_scene ); - fb = &rast->curr_bins->fb; - write_depth = rast->curr_bins->write_depth; + fb = &rast->curr_scene->fb; + write_depth = rast->curr_scene->write_depth; lp_rast_begin( rast, fb, fb->cbufs[0] != NULL, @@ -716,25 +716,27 @@ thread_func( void *init_data ) } /* Wait for all threads to get here so that threads[1+] don't - * get a null rast->curr_bins pointer. + * get a null rast->curr_scene pointer. */ pipe_barrier_wait( &rast->barrier ); /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); - rasterize_bins(rast, task->thread_index, - rast->curr_bins, rast->curr_bins->write_depth); + rasterize_scene(rast, + task->thread_index, + rast->curr_scene, + rast->curr_scene->write_depth); - /* wait for all threads to finish with this set of bins */ + /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); if (task->thread_index == 0) { /* thread[0]: - * - release the bins object + * - release the scene object * - unmap the framebuffer surfaces */ - release_bins( rast, rast->curr_bins ); + release_scene( rast, rast->curr_scene ); lp_rast_end( rast ); } @@ -773,11 +775,11 @@ create_rast_threads(struct lp_rasterizer *rast) /** * Create new lp_rasterizer. - * \param empty the queue to put empty bins on after we've finished + * \param empty the queue to put empty scenes on after we've finished * processing them. */ struct lp_rasterizer * -lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) +lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) { struct lp_rasterizer *rast; unsigned i; @@ -788,8 +790,8 @@ lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty ) rast->screen = screen; - rast->empty_bins = empty; - rast->full_bins = lp_bins_queue_create(); + rast->empty_scenes = empty; + rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index bd8f1ae1c9b..2dd0193d8dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -42,8 +42,8 @@ struct lp_rasterizer; -struct lp_bins; -struct lp_bins_queue; +struct lp_scene; +struct lp_scene_queue; struct lp_fence; struct cmd_bin; struct pipe_screen; @@ -130,16 +130,16 @@ struct lp_rast_triangle { struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, - struct lp_bins_queue *empty ); + struct lp_scene_queue *empty ); void lp_rast_destroy( struct lp_rasterizer * ); unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); -void lp_rasterize_bins( struct lp_rasterizer *rast, - struct lp_bins *bins, - const struct pipe_framebuffer_state *fb, - bool write_depth ); +void lp_rasterize_scene( struct lp_rasterizer *rast, + struct lp_scene *scene, + const struct pipe_framebuffer_state *fb, + bool write_depth ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index ba14fc36751..79a90f6610c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -110,13 +110,13 @@ struct lp_rasterizer char clear_stencil; } state; - /** The incoming queue of filled bins to rasterize */ - struct lp_bins_queue *full_bins; - /** The outgoing queue of emptied bins to return to setup modulee */ - struct lp_bins_queue *empty_bins; + /** The incoming queue of scenes ready to rasterize */ + struct lp_scene_queue *full_scenes; + /** The outgoing queue of processed scenes to return to setup modulee */ + struct lp_scene_queue *empty_scenes; - /** The bins currently being rasterized by the threads */ - struct lp_bins *curr_bins; + /** The scene currently being rasterized by the threads */ + struct lp_scene *curr_scene; /** A task object for each rasterization thread */ struct lp_rasterizer_task tasks[MAX_THREADS]; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c new file mode 100644 index 00000000000..774a1fecd71 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -0,0 +1,310 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_scene.h" + + +struct lp_scene * +lp_scene_create(void) +{ + struct lp_scene *scene = CALLOC_STRUCT(lp_scene); + if (scene) + lp_scene_init(scene); + return scene; +} + + +void +lp_scene_destroy(struct lp_scene *scene) +{ + lp_scene_reset(scene); + lp_scene_free_bin_data(scene); + FREE(scene); +} + + +void +lp_scene_init(struct lp_scene *scene) +{ + unsigned i, j; + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + + scene->data.head = + scene->data.tail = CALLOC_STRUCT(data_block); + + pipe_mutex_init(scene->mutex); +} + + +/** + * Set scene to empty state. + */ +void +lp_scene_reset(struct lp_scene *scene ) +{ + unsigned i, j; + + /* Free all but last binner command lists: + */ + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + struct cmd_block_list *list = &bin->commands; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->count = 0; + } + } + + /* Free all but last binned data block: + */ + { + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->used = 0; + } +} + + +/** + * Free all data associated with the given bin, but don't free(scene). + */ +void +lp_scene_free_bin_data(struct lp_scene *scene) +{ + unsigned i, j; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + /* lp_reset_scene() should have been already called */ + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; + } + + FREE(scene->data.head); + scene->data.head = NULL; + + pipe_mutex_destroy(scene->mutex); +} + + +void +lp_scene_set_framebuffer_size( struct lp_scene *scene, + unsigned width, unsigned height ) +{ + scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; + scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; +} + + +void +lp_bin_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} + + +void +lp_bin_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} + + +/** Return number of bytes used for all bin data within a scene */ +unsigned +lp_scene_data_size( const struct lp_scene *scene ) +{ + unsigned size = 0; + const struct data_block *block; + for (block = scene->data.head; block; block = block->next) { + size += block->used; + } + return size; +} + + +/** Return number of bytes used for a single bin */ +unsigned +lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) +{ + struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y); + const struct cmd_block *cmd; + unsigned size = 0; + for (cmd = bin->commands.head; cmd; cmd = cmd->next) { + size += (cmd->count * + (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); + } + return size; +} + + +/** + * Return last command in the bin + */ +static lp_rast_cmd +lp_get_last_command( const struct cmd_bin *bin ) +{ + const struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + if (i > 0) + return tail->cmd[i - 1]; + else + return NULL; +} + + +/** + * Replace the arg of the last command in the bin. + */ +static void +lp_replace_last_command_arg( struct cmd_bin *bin, + const union lp_rast_cmd_arg arg ) +{ + struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + assert(i > 0); + tail->arg[i - 1] = arg; +} + + + +/** + * Put a state-change command into all bins. + * If we find that the last command in a bin was also a state-change + * command, we can simply replace that one with the new one. + */ +void +lp_scene_bin_state_command( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + lp_rast_cmd last_cmd = lp_get_last_command(bin); + if (last_cmd == cmd) { + lp_replace_last_command_arg(bin, arg); + } + else { + lp_scene_bin_command( scene, i, j, cmd, arg ); + } + } + } +} + + +/** advance curr_x,y to the next bin */ +static boolean +next_bin(struct lp_scene *scene) +{ + scene->curr_x++; + if (scene->curr_x >= scene->tiles_x) { + scene->curr_x = 0; + scene->curr_y++; + } + if (scene->curr_y >= scene->tiles_y) { + /* no more bins */ + return FALSE; + } + return TRUE; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ) +{ + scene->curr_x = scene->curr_y = -1; +} + + +/** + * Return point to next bin to be rendered. + * The lp_scene::curr_x and ::curr_y fields will be advanced. + * Multiple rendering threads will call this function to get a chunk + * of work (a bin) to work on. + */ +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ) +{ + struct cmd_bin *bin = NULL; + + pipe_mutex_lock(scene->mutex); + + if (scene->curr_x < 0) { + /* first bin */ + scene->curr_x = 0; + scene->curr_y = 0; + } + else if (!next_bin(scene)) { + /* no more bins left */ + goto end; + } + + bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y); + *bin_x = scene->curr_x; + *bin_y = scene->curr_y; + +end: + /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ + pipe_mutex_unlock(scene->mutex); + return bin; +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h new file mode 100644 index 00000000000..796fc516ccc --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Binner data structures and bin-related functions. + * Note: the "setup" code is concerned with building scenes while + * The "rast" code is concerned with consuming/executing scenes. + */ + +#ifndef LP_SCENE_H +#define LP_SCENE_H + +#include "pipe/p_thread.h" +#include "lp_tile_soa.h" +#include "lp_rast.h" + + +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 +#define TILES_X (MAXWIDTH / TILE_SIZE) +#define TILES_Y (MAXHEIGHT / TILE_SIZE) + + +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) + + + +/* switch to a non-pointer value for this: + */ +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +struct cmd_block { + lp_rast_cmd cmd[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; + unsigned count; + struct cmd_block *next; +}; + +struct data_block { + ubyte data[DATA_BLOCK_SIZE]; + unsigned used; + struct data_block *next; +}; + +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; +}; + +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + struct cmd_block_list commands; +}; + + +/** + * This stores bulk data which is shared by all bins within a scene. + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + */ +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + + +/** + * All bins and bin data are contained here. + * Per-bin data goes into the 'tile' bins. + * Shared data goes into the 'data' buffer. + * + * When there are multiple threads, will want to double-buffer between + * scenes: + */ +struct lp_scene { + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; + + /** the framebuffer to render the scene into */ + struct pipe_framebuffer_state fb; + + boolean write_depth; + + /** + * Number of active tiles in each dimension. + * This basically the framebuffer size divided by tile size + */ + unsigned tiles_x, tiles_y; + + int curr_x, curr_y; /**< for iterating over bins */ + pipe_mutex mutex; +}; + + + +struct lp_scene *lp_scene_create(void); + +void lp_scene_destroy(struct lp_scene *scene); + + +void lp_scene_init(struct lp_scene *scene); + +void lp_scene_reset(struct lp_scene *scene ); + +void lp_scene_free_bin_data(struct lp_scene *scene); + +void lp_scene_set_framebuffer_size( struct lp_scene *scene, + unsigned width, unsigned height ); + +void lp_bin_new_data_block( struct data_block_list *list ); + +void lp_bin_new_cmd_block( struct cmd_block_list *list ); + +unsigned lp_scene_data_size( const struct lp_scene *scene ); + +unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); + + +/** + * Allocate space for a command/data in the bin's data buffer. + * Grow the block list if needed. + */ +static INLINE void * +lp_scene_alloc( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + tail->used += size; + return data; + } +} + + +/** + * As above, but with specific alignment. + */ +static INLINE void * +lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, + unsigned alignment ) +{ + struct data_block_list *list = &scene->data; + + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + tail->used += offset + size; + return data + offset; + } +} + + +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static INLINE void +lp_scene_putback_data( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + assert(list->tail->used >= size); + list->tail->used -= size; +} + + +/** Return pointer to a particular tile's bin. */ +static INLINE struct cmd_bin * +lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y) +{ + return &scene->tile[x][y]; +} + + + +/* Add a command to bin[x][y]. + */ +static INLINE void +lp_scene_bin_command( struct lp_scene *scene, + unsigned x, unsigned y, + lp_rast_cmd cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + struct cmd_block_list *list = &bin->commands; + + if (list->tail->count == CMD_BLOCK_MAX) { + lp_bin_new_cmd_block( list ); + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } +} + + +/* Add a command to all active bins. + */ +static INLINE void +lp_scene_bin_everywhere( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) + for (j = 0; j < scene->tiles_y; j++) + lp_scene_bin_command( scene, i, j, cmd, arg ); +} + + +void +lp_scene_bin_state_command( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ); + + +static INLINE unsigned +lp_scene_get_num_bins( const struct lp_scene *scene ) +{ + return scene->tiles_x * scene->tiles_y; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ); + +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); + + +#endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/src/gallium/drivers/llvmpipe/lp_scene_queue.c new file mode 100644 index 00000000000..8d65a6a6fa2 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Scene queue. We'll use two queues. One contains "full" scenes which + * are produced by the "setup" code. The other contains "empty" scenes + * which are produced by the "rast" code when it finishes rendering a scene. + */ + + +#include "pipe/p_thread.h" +#include "util/u_memory.h" +#include "lp_scene_queue.h" + + + +#define MAX_SCENE_QUEUE 4 + + +/** + * A queue of scenes + */ +struct lp_scene_queue +{ + /** XXX might use a linked list here somedone, but the list will + * probably always be pretty short. + */ + struct lp_scene *scenes[MAX_SCENE_QUEUE]; + unsigned count; + + pipe_condvar count_change; + pipe_mutex mutex; +}; + + + +/** Allocate a new scene queue */ +struct lp_scene_queue * +lp_scene_queue_create(void) +{ + struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue); + if (queue) { + pipe_condvar_init(queue->count_change); + pipe_mutex_init(queue->mutex); + } + return queue; +} + + +/** Delete a scene queue */ +void +lp_scene_queue_destroy(struct lp_scene_queue *queue) +{ + pipe_condvar_destroy(queue->count_change); + pipe_mutex_destroy(queue->mutex); +} + + +/** Remove first lp_scene from head of queue */ +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue) +{ + struct lp_scene *scene; + unsigned i; + + pipe_mutex_lock(queue->mutex); + while (queue->count == 0) { + pipe_condvar_wait(queue->count_change, queue->mutex); + } + + assert(queue->count >= 1); + + /* get head */ + scene = queue->scenes[0]; + + /* shift entries */ + for (i = 0; i < queue->count - 1; i++) { + queue->scenes[i] = queue->scenes[i + 1]; + } + + queue->count--; + + /* signal size change */ + pipe_condvar_signal(queue->count_change); + + pipe_mutex_unlock(queue->mutex); + + return scene; +} + + +/** Add an lp_scene to tail of queue */ +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene) +{ + pipe_mutex_lock(queue->mutex); + + assert(queue->count < MAX_SCENE_QUEUE); + + /* debug: check that scene is not already in the queue */ + if (0) { + unsigned i; + for (i = 0; i < queue->count; i++) { + assert(queue->scenes[i] != scene); + } + } + + /* add to end */ + queue->scenes[queue->count++] = scene; + + /* signal size change */ + pipe_condvar_signal(queue->count_change); + + pipe_mutex_unlock(queue->mutex); +} + + +/** Return number of entries in the queue */ +unsigned +lp_scene_queue_count(struct lp_scene_queue *queue) +{ + unsigned count; + pipe_mutex_lock(queue->mutex); + count = queue->count; + pipe_mutex_unlock(queue->mutex); + return count; +} + + +/** Wait until the queue has exactly 'count' entries */ +void +lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned count) +{ + pipe_mutex_lock(queue->mutex); + while (queue->count != count) { + pipe_condvar_wait(queue->count_change, queue->mutex); + } + pipe_mutex_unlock(queue->mutex); +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h new file mode 100644 index 00000000000..1bd475fa504 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_SCENE_QUEUE +#define LP_SCENE_QUEUE + +struct lp_scene_queue; +struct lp_scene; + + +struct lp_scene_queue * +lp_scene_queue_create(void); + +void +lp_scene_queue_destroy(struct lp_scene_queue *queue); + +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue); + +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *bins); + +unsigned +lp_scene_queue_count(struct lp_scene_queue *queue); + +void +lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned size); + + +#endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 0972c167841..76e09552377 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -36,8 +36,8 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" -#include "lp_bin.h" -#include "lp_bin_queue.h" +#include "lp_scene.h" +#include "lp_scene_queue.h" #include "lp_debug.h" #include "lp_fence.h" #include "lp_state.h" @@ -47,26 +47,26 @@ /** XXX temporary value, temporary here */ -#define MAX_BINS 2 +#define MAX_SCENES 2 static void set_state( struct setup_context *, unsigned ); -struct lp_bins * -lp_setup_get_current_bins(struct setup_context *setup) +struct lp_scene * +lp_setup_get_current_scene(struct setup_context *setup) { - if (!setup->bins) { + if (!setup->scene) { /* wait for a free/empty bin */ - setup->bins = lp_bins_dequeue(setup->empty_bins); - if(0)lp_reset_bins( setup->bins ); /* XXX temporary? */ + setup->scene = lp_scene_dequeue(setup->empty_scenes); + if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ if (setup->fb) { - lp_bin_set_framebuffer_size(setup->bins, + lp_scene_set_framebuffer_size(setup->scene, setup->fb->width, setup->fb->height); } } - return setup->bins; + return setup->scene; } @@ -111,7 +111,7 @@ static void reset_context( struct setup_context *setup ) setup->dirty = ~0; /* no current bin */ - setup->bins = NULL; + setup->scene = NULL; /* Reset some state: */ @@ -126,15 +126,15 @@ static void reset_context( struct setup_context *setup ) } -/** Rasterize all tile's bins */ +/** Rasterize all scene's bins */ static void -lp_setup_rasterize_bins( struct setup_context *setup, +lp_setup_rasterize_scene( struct setup_context *setup, boolean write_depth ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_rasterize_bins(setup->rast, - bins, + lp_rasterize_scene(setup->rast, + scene, setup->fb, write_depth); @@ -148,28 +148,28 @@ lp_setup_rasterize_bins( struct setup_context *setup, static void begin_binning( struct setup_context *setup ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->fb->cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); else - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_load_color, lp_rast_arg_null() ); } if (setup->fb->zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); else - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_load_zstencil, lp_rast_arg_null() ); } @@ -189,7 +189,7 @@ execute_clears( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - lp_setup_rasterize_bins( setup, TRUE ); + lp_setup_rasterize_scene( setup, TRUE ); } @@ -220,7 +220,7 @@ set_state( struct setup_context *setup, if (old_state == SETUP_CLEARED) execute_clears( setup ); else - lp_setup_rasterize_bins( setup, TRUE ); + lp_setup_rasterize_scene( setup, TRUE ); break; } @@ -242,7 +242,7 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -250,7 +250,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, setup->fb = fb; - lp_bin_set_framebuffer_size(bins, setup->fb->width, setup->fb->height); + lp_scene_set_framebuffer_size(scene, setup->fb->width, setup->fb->height); } @@ -261,7 +261,7 @@ lp_setup_clear( struct setup_context *setup, unsigned stencil, unsigned flags ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); unsigned i; LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); @@ -280,19 +280,19 @@ lp_setup_clear( struct setup_context *setup, } if (setup->state == SETUP_ACTIVE) { - /* Add the clear to existing bins. In the unusual case where + /* Add the clear to existing scene. In the unusual case where * both color and depth-stencil are being cleared when there's * already been some rendering, we could discard the currently * binned scene and start again, but I don't see that as being * a common usage. */ if (flags & PIPE_CLEAR_COLOR) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) - lp_bin_everywhere( bins, + lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); } @@ -315,8 +315,8 @@ lp_setup_clear( struct setup_context *setup, struct pipe_fence_handle * lp_setup_fence( struct setup_context *setup ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); - const unsigned rank = lp_bin_get_num_bins( bins ); + struct lp_scene *scene = lp_setup_get_current_scene(setup); + const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ struct lp_fence *fence = lp_fence_create(rank); LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); @@ -324,9 +324,9 @@ lp_setup_fence( struct setup_context *setup ) set_state( setup, SETUP_ACTIVE ); /* insert the fence into all command bins */ - lp_bin_everywhere( bins, - lp_rast_fence, - lp_rast_arg_fence(fence) ); + lp_scene_bin_everywhere( scene, + lp_rast_fence, + lp_rast_arg_fence(fence) ); return (struct pipe_fence_handle *) fence; } @@ -455,7 +455,7 @@ lp_setup_is_texture_referenced( struct setup_context *setup, static INLINE void lp_setup_update_shader_state( struct setup_context *setup ) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -465,7 +465,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) uint8_t *stored; unsigned i, j; - stored = lp_bin_alloc_aligned(bins, 4 * 16, 16); + stored = lp_scene_alloc_aligned(scene, 4 * 16, 16); /* smear each blend color component across 16 ubyte elements */ for (i = 0; i < 4; ++i) { @@ -497,7 +497,7 @@ lp_setup_update_shader_state( struct setup_context *setup ) current_size) != 0) { void *stored; - stored = lp_bin_alloc(bins, current_size); + stored = lp_scene_alloc(scene, current_size); if(stored) { memcpy(stored, current_data, @@ -522,12 +522,12 @@ lp_setup_update_shader_state( struct setup_context *setup ) memcmp(setup->fs.stored, &setup->fs.current, sizeof setup->fs.current) != 0) { - /* The fs state that's been stored in the bins is different from + /* The fs state that's been stored in the scene is different from * the new, current state. So allocate a new lp_rast_state object * and append it to the bin's setup data buffer. */ struct lp_rast_state *stored = - (struct lp_rast_state *) lp_bin_alloc(bins, sizeof *stored); + (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored); if(stored) { memcpy(stored, &setup->fs.current, @@ -535,9 +535,9 @@ lp_setup_update_shader_state( struct setup_context *setup ) setup->fs.stored = stored; /* put the state-set command into all bins */ - lp_bin_state_command( bins, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); + lp_scene_bin_state_command( scene, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); } } } @@ -587,12 +587,12 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); - /* free the bins in the 'empty' queue */ - while (lp_bins_queue_count(setup->empty_bins) > 0) { - struct lp_bins *bins = lp_bins_dequeue(setup->empty_bins); - if (!bins) + /* free the scenes in the 'empty' queue */ + while (lp_scene_queue_count(setup->empty_scenes) > 0) { + struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes); + if (!scene) break; - lp_bins_destroy(bins); + lp_scene_destroy(scene); } lp_rast_destroy( setup->rast ); @@ -614,18 +614,18 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup) return NULL; - setup->empty_bins = lp_bins_queue_create(); - if (!setup->empty_bins) + setup->empty_scenes = lp_scene_queue_create(); + if (!setup->empty_scenes) goto fail; - setup->rast = lp_rast_create( screen, setup->empty_bins ); + setup->rast = lp_rast_create( screen, setup->empty_scenes ); if (!setup->rast) goto fail; - /* create some empty bins */ - for (i = 0; i < MAX_BINS; i++) { - struct lp_bins *bins = lp_bins_create(); - lp_bins_enqueue(setup->empty_bins, bins); + /* create some empty scenes */ + for (i = 0; i < MAX_SCENES; i++) { + struct lp_scene *scene = lp_scene_create(); + lp_scene_enqueue(setup->empty_scenes, scene); } setup->triangle = first_triangle; @@ -637,8 +637,8 @@ lp_setup_create( struct pipe_screen *screen ) return setup; fail: - if (setup->empty_bins) - lp_bins_queue_destroy(setup->empty_bins); + if (setup->empty_scenes) + lp_scene_queue_destroy(setup->empty_scenes); FREE(setup); return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 584e37665bc..180d9eca84b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -38,7 +38,7 @@ #include "lp_setup.h" #include "lp_rast.h" #include "lp_tile_soa.h" /* for TILE_SIZE */ -#include "lp_bin.h" +#include "lp_scene.h" #define LP_SETUP_NEW_FS 0x01 @@ -46,7 +46,7 @@ #define LP_SETUP_NEW_BLEND_COLOR 0x04 -struct lp_bins_queue; +struct lp_scene_queue; /** @@ -59,8 +59,8 @@ struct setup_context { struct lp_rasterizer *rast; - struct lp_bins *bins; /**< current bins */ - struct lp_bins_queue *empty_bins; /**< queue of empty bins */ + struct lp_scene *scene; /**< current scene */ + struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ boolean ccw_is_frontface; unsigned cullmode; @@ -83,7 +83,7 @@ struct setup_context { struct lp_shader_input input[PIPE_MAX_ATTRIBS]; unsigned nr_inputs; - const struct lp_rast_state *stored; /**< what's in the bins */ + const struct lp_rast_state *stored; /**< what's in the scene */ struct lp_rast_state current; /**< currently set state */ } fs; @@ -118,6 +118,6 @@ void lp_setup_choose_triangle( struct setup_context *setup ); void lp_setup_choose_line( struct setup_context *setup ); void lp_setup_choose_point( struct setup_context *setup ); -struct lp_bins *lp_setup_get_current_bins(struct setup_context *setup); +struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 80617120b1c..aeaf260af27 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -178,7 +178,7 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface) { - struct lp_bins *bins = lp_setup_get_current_bins(setup); + struct lp_scene *scene = lp_setup_get_current_scene(setup); unsigned slot; /* Allocate space for the a0, dadx and dady arrays @@ -186,9 +186,9 @@ static void setup_tri_coefficients( struct setup_context *setup, { unsigned bytes; bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_bin_alloc_aligned( bins, bytes, 16 ); - tri->inputs.dadx = lp_bin_alloc_aligned( bins, bytes, 16 ); - tri->inputs.dady = lp_bin_alloc_aligned( bins, bytes, 16 ); + tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); } /* The internal position input is in slot zero: @@ -246,8 +246,8 @@ static inline int subpixel_snap( float a ) /** * Do basic setup for triangle rasterization and determine which - * framebuffer tiles are touched. Put the triangle in the bins for the - * tiles which we overlap. + * framebuffer tiles are touched. Put the triangle in the scene's + * bins for the tiles which we overlap. */ static void do_triangle_ccw(struct setup_context *setup, @@ -264,8 +264,8 @@ do_triangle_ccw(struct setup_context *setup, const int y2 = subpixel_snap(v2[0][1]); const int y3 = subpixel_snap(v3[0][1]); - struct lp_bins *bins = lp_setup_get_current_bins(setup); - struct lp_rast_triangle *tri = lp_bin_alloc( bins, sizeof *tri ); + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *tri = lp_scene_alloc( scene, sizeof *tri ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -285,7 +285,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_bin_putback_data( bins, sizeof *tri ); + lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -297,7 +297,7 @@ do_triangle_ccw(struct setup_context *setup, if (tri->miny == tri->maxy || tri->minx == tri->maxx) { - lp_bin_putback_data( bins, sizeof *tri ); + lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -407,8 +407,8 @@ do_triangle_ccw(struct setup_context *setup, { /* Triangle is contained in a single tile: */ - lp_bin_command( bins, minx, miny, lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_scene_bin_command( scene, minx, miny, lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } else { @@ -466,17 +466,17 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ - lp_bin_command( bins, x, y, - lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); + lp_scene_bin_command( scene, x, y, + lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); } else { in = 1; /* shade partial tile */ - lp_bin_command( bins, x, y, - lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_scene_bin_command( scene, x, y, + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); } /* Iterate cx values across the region: -- cgit v1.2.3 From b06b3a492519de85a53604e8b72a201d284584ea Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Dec 2009 11:49:23 -0700 Subject: llvmpipe: update file list in Makefile --- src/gallium/drivers/llvmpipe/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 7c4cf320b97..345326e33d7 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -6,8 +6,8 @@ LIBNAME = llvmpipe CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ - lp_bin.c \ - lp_bin_queue.c \ + lp_scene.c \ + lp_scene_queue.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ -- cgit v1.2.3 From 6e2a93739e2bbd857c62e1c3959a9032d591717a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Dec 2009 15:24:31 -0700 Subject: llvmpipe: fix broken lp_build_abs() --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index d27ef0de041..e7eb5f833b6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -591,7 +591,8 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + unsigned long absMask = ~(1 << (type.width - 1)); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); -- cgit v1.2.3 From 55879440d703bf9f5a4040d04a2f2cd024fa07c2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Dec 2009 15:27:35 -0700 Subject: llvmpipe: fix broken TGSI_OPCODE_FRC codegen --- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index adc81569ed5..83ac25bb200 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -777,7 +777,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); tmp0 = lp_build_floor(&bld->base, src0); - tmp0 = lp_build_sub(&bld->base, tmp0, src0); + tmp0 = lp_build_sub(&bld->base, src0, tmp0); dst0[chan_index] = tmp0; } break; -- cgit v1.2.3 From 276b8523e82c36ec2def21d16fdf7f6a32a3bd37 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 15 Dec 2009 07:58:37 -0700 Subject: llvmpipe: use 1ULL to be ready for 64-bit arithmetic someday --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index e7eb5f833b6..f8260938f5a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -591,7 +591,7 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long absMask = ~(1 << (type.width - 1)); + unsigned long long absMask = ~(1ULL << (type.width - 1)); LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); -- cgit v1.2.3 From cdbcd96fdfe2c4d09e9b34cb083664d6b6e0558b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 15 Dec 2009 15:39:48 -0700 Subject: llvmpipe: tighten up an assertion --- src/gallium/drivers/llvmpipe/lp_rast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index fd9cd67d859..ec87d907b81 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -358,8 +358,8 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; #endif - assert((x % 2) == 0); - assert((y % 2) == 0); + assert((x % 4) == 0); + assert((y % 4) == 0); ix = x % TILE_SIZE; iy = y % TILE_SIZE; -- cgit v1.2.3 From 2297bc9233be014b7b5aa037769209fbe9f6a66c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 12:32:45 -0700 Subject: llvmpipe: refactor lp_build_cmp() to use lp_build_compare() --- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 81 +++++++++++++++++------------ src/gallium/drivers/llvmpipe/lp_bld_logic.h | 8 +++ 2 files changed, 56 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 9470f834fc7..d094a040d6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -42,16 +42,16 @@ /** - * Build code to compare two values 'a' and 'b' using the given func. - * \parm func one of PIPE_FUNC_x + * Build code to compare two values 'a' and 'b' of 'type' using the given func. + * \param func one of PIPE_FUNC_x */ LLVMValueRef -lp_build_cmp(struct lp_build_context *bld, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) { - const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -104,7 +104,7 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } if(swap) { @@ -117,11 +117,11 @@ lp_build_cmp(struct lp_build_context *bld, } args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); - res = lp_build_intrinsic(bld->builder, + res = lp_build_intrinsic(builder, "llvm.x86.sse.cmp.ps", vec_type, args, 3); - res = LLVMBuildBitCast(bld->builder, res, int_vec_type, ""); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); return res; } else if(util_cpu_caps.has_sse2) { @@ -161,7 +161,7 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } /* There are no signed byte and unsigned word/dword comparison @@ -171,8 +171,8 @@ lp_build_cmp(struct lp_build_context *bld, ((type.width == 8 && type.sign) || (type.width != 8 && !type.sign))) { LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); - a = LLVMBuildXor(bld->builder, a, msb, ""); - b = LLVMBuildXor(bld->builder, b, msb, ""); + a = LLVMBuildXor(builder, a, msb, ""); + b = LLVMBuildXor(builder, b, msb, ""); } if(table[func].swap) { @@ -185,14 +185,14 @@ lp_build_cmp(struct lp_build_context *bld, } if(table[func].eq) - res = lp_build_intrinsic(bld->builder, pcmpeq, vec_type, args, 2); + res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); else if (table[func].gt) - res = lp_build_intrinsic(bld->builder, pcmpgt, vec_type, args, 2); + res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); else res = LLVMConstNull(vec_type); if(table[func].not) - res = LLVMBuildNot(bld->builder, res, ""); + res = LLVMBuildNot(builder, res, ""); return res; } @@ -228,28 +228,28 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } #if 0 /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildFCmp(bld->builder, op, a, b, ""); - res = LLVMBuildSelect(bld->builder, cond, ones, zeros, ""); + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); #else debug_printf("%s: warning: using slow element-wise vector comparison\n", __FUNCTION__); res = LLVMGetUndef(int_vec_type); for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(bld->builder, op, - LLVMBuildExtractElement(bld->builder, a, index, ""), - LLVMBuildExtractElement(bld->builder, b, index, ""), + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), ""); - cond = LLVMBuildSelect(bld->builder, cond, + cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); - res = LLVMBuildInsertElement(bld->builder, res, cond, index, ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); } #endif } @@ -276,28 +276,28 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } #if 0 /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildICmp(bld->builder, op, a, b, ""); - res = LLVMBuildSelect(bld->builder, cond, ones, zeros, ""); + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); #else - debug_printf("%s: warning: using slow element-wise vector comparison\n", + debug_printf("%s: warning: using slow element-wise int vector comparison\n", __FUNCTION__); res = LLVMGetUndef(int_vec_type); for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(bld->builder, op, - LLVMBuildExtractElement(bld->builder, a, index, ""), - LLVMBuildExtractElement(bld->builder, b, index, ""), + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), ""); - cond = LLVMBuildSelect(bld->builder, cond, + cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); - res = LLVMBuildInsertElement(bld->builder, res, cond, index, ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); } #endif } @@ -306,6 +306,21 @@ lp_build_cmp(struct lp_build_context *bld, } + +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + */ +LLVMValueRef +lp_build_cmp(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare(bld->builder, bld->type, func, a, b); +} + + LLVMValueRef lp_build_select(struct lp_build_context *bld, LLVMValueRef mask, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index a4ee7723b5f..d6876366561 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -46,6 +46,14 @@ struct lp_type type; struct lp_build_context; +LLVMValueRef +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b); + + /** * @param func is one of PIPE_FUNC_xxx */ -- cgit v1.2.3 From e288796c92bb7d75cd6dfee968804c6230ef38d7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 12:33:11 -0700 Subject: llvmpipe: added lp_build_int32_vec4_type() --- src/gallium/drivers/llvmpipe/lp_bld_type.c | 21 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_type.h | 4 ++++ 2 files changed, 25 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 606243d6c5a..e8cf7256c0e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -157,6 +157,27 @@ lp_build_int_vec_type(struct lp_type type) } +/** + * Build int32[4] vector type + */ +LLVMTypeRef +lp_build_int32_vec4_type() +{ + struct lp_type t; + LLVMTypeRef type; + + memset(&t, 0, sizeof(t)); + t.floating = FALSE; /* floating point values */ + t.sign = TRUE; /* values are signed */ + t.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + t.width = 32; /* 32-bit int */ + t.length = 4; /* 4 elements per vector */ + + type = lp_build_int_elem_type(t); + return LLVMVectorType(type, t.length); +} + + struct lp_type lp_int_type(struct lp_type type) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index ee5ca3483c1..118fb339089 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -162,6 +162,10 @@ LLVMTypeRef lp_build_int_vec_type(struct lp_type type); +LLVMTypeRef +lp_build_int32_vec4_type(); + + struct lp_type lp_int_type(struct lp_type type); -- cgit v1.2.3 From ab9438193083b7f9a3180cb9cea45e269131048a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 16:02:59 -0700 Subject: llvmpipe: do final the pixel in/out triangle test in the fragment shader The test to determine which of the pixels in a 2x2 quad is now done in the fragment shader rather than in the calling C code. This is a little faster but there's a few more things to do. Note that the step[] array elements are in a different order now. Rather than being in row-major order for the 4x4 grid, they're in "quad-major" order. The setup of the step arrays is a little more complicated now. So is the course/intermediate tile test code, but some lookup tables help with that. Next steps: - early-cull 2x2 quads which are totally outside the triangle. - skip the in/out test for fully contained quads - make the in/out comparison code tighter/faster. --- src/gallium/drivers/llvmpipe/lp_jit.h | 9 +- src/gallium/drivers/llvmpipe/lp_rast.c | 76 +++------- src/gallium/drivers/llvmpipe/lp_rast.h | 11 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 11 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 222 +++++++++++++++------------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 49 +++--- src/gallium/drivers/llvmpipe/lp_state_fs.c | 144 ++++++++++++++++-- 7 files changed, 302 insertions(+), 220 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 7eccb5da859..e8fb7d990f8 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -114,9 +114,14 @@ typedef void const void *a0, const void *dadx, const void *dady, - const uint32_t *mask, void *color, - void *depth); + void *depth, + const int32_t c1, + const int32_t c2, + const int32_t c3, + const int32_t *step1, + const int32_t *step2, + const int32_t *step3); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index ec87d907b81..b1bd27d3406 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" @@ -279,6 +280,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { + /* Set c1,c2,c3 to large values so the in/out test always passes */ + const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; @@ -296,7 +299,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, inputs, tile_x + x, tile_y + y, - mask); + c1, c2, c3); } @@ -308,58 +311,25 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - unsigned mask) + int32_t c1, int32_t c2, int32_t c3) { -#if 1 const struct lp_rast_state *state = rast->tasks[thread_index].current_state; struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; void *color; void *depth; - uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; unsigned ix, iy; int block_offset; +#ifdef DEBUG assert(state); /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); - /* mask: the rasterizer wants to treat pixels in 4x4 blocks, but - * the pixel shader wants to swizzle them into 4 2x2 quads. - * - * Additionally, the pixel shader wants masks as full dword ~0, - * while the rasterizer wants to pack per-pixel bits tightly. - */ -#if 0 - unsigned qx, qy; - for (qy = 0; qy < 2; ++qy) - for (qx = 0; qx < 2; ++qx) - for (iy = 0; iy < 2; ++iy) - for (ix = 0; ix < 2; ++ix) - masks[qy][qx][iy][ix] = mask & (1 << (qy*8+iy*4+qx*2+ix)) ? ~0 : 0; -#else - masks[0][0][0][0] = mask & (1 << (0*8+0*4+0*2+0)) ? ~0 : 0; - masks[0][0][0][1] = mask & (1 << (0*8+0*4+0*2+1)) ? ~0 : 0; - masks[0][0][1][0] = mask & (1 << (0*8+1*4+0*2+0)) ? ~0 : 0; - masks[0][0][1][1] = mask & (1 << (0*8+1*4+0*2+1)) ? ~0 : 0; - masks[0][1][0][0] = mask & (1 << (0*8+0*4+1*2+0)) ? ~0 : 0; - masks[0][1][0][1] = mask & (1 << (0*8+0*4+1*2+1)) ? ~0 : 0; - masks[0][1][1][0] = mask & (1 << (0*8+1*4+1*2+0)) ? ~0 : 0; - masks[0][1][1][1] = mask & (1 << (0*8+1*4+1*2+1)) ? ~0 : 0; - - masks[1][0][0][0] = mask & (1 << (1*8+0*4+0*2+0)) ? ~0 : 0; - masks[1][0][0][1] = mask & (1 << (1*8+0*4+0*2+1)) ? ~0 : 0; - masks[1][0][1][0] = mask & (1 << (1*8+1*4+0*2+0)) ? ~0 : 0; - masks[1][0][1][1] = mask & (1 << (1*8+1*4+0*2+1)) ? ~0 : 0; - masks[1][1][0][0] = mask & (1 << (1*8+0*4+1*2+0)) ? ~0 : 0; - masks[1][1][0][1] = mask & (1 << (1*8+0*4+1*2+1)) ? ~0 : 0; - masks[1][1][1][0] = mask & (1 << (1*8+1*4+1*2+0)) ? ~0 : 0; - masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; -#endif - assert((x % 4) == 0); assert((y % 4) == 0); +#endif ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -373,39 +343,27 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /* depth buffer */ depth = tile->depth + block_offset; - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(masks, 16)); - +#ifdef DEBUG assert(lp_check_alignment(depth, 16)); assert(lp_check_alignment(color, 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); + assert(lp_check_alignment(inputs->step[0], 16)); + assert(lp_check_alignment(inputs->step[1], 16)); + assert(lp_check_alignment(inputs->step[2], 16)); +#endif + /* run shader */ state->jit_function( &state->jit_context, x, y, inputs->a0, inputs->dadx, inputs->dady, - &masks[0][0][0][0], color, - depth); -#else - struct lp_rast_tile *tile = &rast->tile; - unsigned chan_index; - unsigned q, ix, iy; - - x %= TILE_SIZE; - y %= TILE_SIZE; - - /* mask */ - for (q = 0; q < 4; ++q) - for(iy = 0; iy < 2; ++iy) - for(ix = 0; ix < 2; ++ix) - if(masks[q] & (1 << (iy*2 + ix))) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - TILE_PIXEL(tile->color, x + q*2 + ix, y + iy, chan_index) = 0xff; - -#endif + depth, + c1, c2, c3, + inputs->step[0], inputs->step[1], inputs->step[2] + ); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 2dd0193d8dc..46e22f69a61 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -80,6 +80,9 @@ struct lp_rast_shader_inputs { float (*a0)[4]; float (*dadx)[4]; float (*dady)[4]; + + /* edge/step info for 3 edges and 4x4 block of pixels */ + int ALIGN16_ATTRIB step[3][16]; }; @@ -117,14 +120,10 @@ struct lp_rast_triangle { int dx31; /* edge function values at minx,miny ?? */ - int c1; - int c2; - int c3; - - int step[3][16]; + int c1, c2, c3; /* inputs for the shader */ - struct lp_rast_shader_inputs inputs; + struct lp_rast_shader_inputs ALIGN16_ATTRIB inputs; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 79a90f6610c..cd72d7e69d8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -61,15 +61,6 @@ struct lp_rasterizer_task unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ - /* Pixel blocks produced during rasterization - */ - unsigned nr_blocks; - struct { - unsigned x; - unsigned y; - unsigned mask; - } blocks[256]; - const struct lp_rast_state *current_state; /** "back" pointer */ @@ -133,6 +124,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - unsigned masks); + int32_t c1, int32_t c2, int32_t c3); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 6c96010c52f..9b1861223ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,6 +29,7 @@ * Rasterization for binned triangles within a tile */ +#include #include "util/u_math.h" #include "lp_debug.h" #include "lp_rast_priv.h" @@ -36,42 +37,89 @@ /** - * Add a 4x4 block of pixels to the block list. - * All pixels are known to be inside the triangle's bounds. + * Map an index in [0,15] to an x,y position, multiplied by 4. + * This is used to get the position of each subtile in a 4x4 + * grid of edge step values. + */ +static const int pos_table4[16][2] = { + { 0, 0 }, + { 4, 0 }, + { 0, 4 }, + { 4, 4 }, + { 8, 0 }, + { 12, 0 }, + { 8, 4 }, + { 12, 4 }, + { 0, 8 }, + { 4, 8 }, + { 0, 12 }, + { 4, 12 }, + { 8, 8 }, + { 12, 8 }, + { 8, 12 }, + { 12, 12 } +}; + + +static const int pos_table16[16][2] = { + { 0, 0 }, + { 16, 0 }, + { 0, 16 }, + { 16, 16 }, + { 32, 0 }, + { 48, 0 }, + { 32, 16 }, + { 48, 16 }, + { 0, 32 }, + { 16, 32 }, + { 0, 48 }, + { 16, 48 }, + { 32, 32 }, + { 48, 32 }, + { 32, 48 }, + { 48, 48 } +}; + + +/** + * Shade all pixels in a 4x4 block. */ static void -block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) +block_full_4( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y ) { - const unsigned i = rast_task->nr_blocks; - assert(x % 4 == 0); - assert(y % 4 == 0); - rast_task->blocks[i].x = x; - rast_task->blocks[i].y = y; - rast_task->blocks[i].mask = ~0; - rast_task->nr_blocks++; + /* Set c1,c2,c3 to large values so the in/out test always passes */ + const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; + lp_rast_shade_quads(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y, + c1, c2, c3); } /** - * Add a 16x16 block of pixels to the block list. - * All pixels are known to be inside the triangle's bounds. + * Shade all pixels in a 16x16 block. */ static void -block_full_16( struct lp_rasterizer_task *rast_task, int x, int y ) +block_full_16( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y ) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast_task, x + ix, y + iy); + block_full_4(rast_task, tri, x + ix, y + iy); } /** - * Evaluate each pixel in a 4x4 block to determine if it lies within - * the triangle's bounds. - * Generate a mask of in/out flags and add the block to the blocks list. + * Pass the 4x4 pixel block to the shader function. + * Determination of which of the 16 pixels lies inside the triangle + * will be done as part of the fragment shader. */ static void do_block_4( struct lp_rasterizer_task *rast_task, @@ -81,28 +129,11 @@ do_block_4( struct lp_rasterizer_task *rast_task, int c2, int c3 ) { - int i; - unsigned mask = 0; - - assert(x % 4 == 0); - assert(y % 4 == 0); - - for (i = 0; i < 16; i++) { - int any_negative = ((c1 + tri->step[0][i]) | - (c2 + tri->step[1][i]) | - (c3 + tri->step[2][i])) >> 31; - mask |= (~any_negative) & (1 << i); - } - - /* As we do trivial reject already, masks should rarely be all zero: - */ - if (mask) { - const unsigned i = rast_task->nr_blocks; - rast_task->blocks[i].x = x; - rast_task->blocks[i].y = y; - rast_task->blocks[i].mask = mask; - rast_task->nr_blocks++; - } + lp_rast_shade_quads(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y, + c1, c2, c3); } @@ -118,40 +149,42 @@ do_block_16( struct lp_rasterizer_task *rast_task, int c2, int c3 ) { - int ix, iy, i = 0; + const int ei1 = tri->ei1 * 4; + const int ei2 = tri->ei2 * 4; + const int ei3 = tri->ei3 * 4; - int ei1 = tri->ei1 * 4; - int ei2 = tri->ei2 * 4; - int ei3 = tri->ei3 * 4; + const int eo1 = tri->eo1 * 4; + const int eo2 = tri->eo2 * 4; + const int eo3 = tri->eo3 * 4; - int eo1 = tri->eo1 * 4; - int eo2 = tri->eo2 * 4; - int eo3 = tri->eo3 * 4; + int i; assert(x % 16 == 0); assert(y % 16 == 0); - for (iy = 0; iy < 16; iy+=4) { - for (ix = 0; ix < 16; ix+=4, i++) { - int cx1 = c1 + (tri->step[0][i] * 4); - int cx2 = c2 + (tri->step[1][i] * 4); - int cx3 = c3 + (tri->step[2][i] * 4); - - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) { - /* the block is completely outside the triangle - nop */ - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) { + for (i = 0; i < 16; i++) { + int cx1 = c1 + (tri->inputs.step[0][i] * 4); + int cx2 = c2 + (tri->inputs.step[1][i] * 4); + int cx3 = c3 + (tri->inputs.step[2][i] * 4); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ + } + else { + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_4(rast_task, x+ix, y+iy); - } - else { + block_full_4(rast_task, tri, px, py); + } + else { /* the block is partially in/out of the triangle */ - do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); - } + do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); + } } } } @@ -171,8 +204,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, int x = rast_task->x; int y = rast_task->y; - int ix, iy; - unsigned i = 0; + unsigned i; int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; @@ -186,48 +218,36 @@ lp_rast_triangle( struct lp_rasterizer *rast, int eo2 = tri->eo2 * 16; int eo3 = tri->eo3 * 16; - assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); - LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); - rast_task->nr_blocks = 0; - /* Walk over the tile to build a list of 4x4 pixel blocks which will * be filled/shaded. We do this at two granularities: 16x16 blocks * and then 4x4 blocks. */ - for (iy = 0; iy < TILE_SIZE; iy += 16) { - for (ix = 0; ix < TILE_SIZE; ix += 16, i++) { - int cx1 = c1 + (tri->step[0][i] * 16); - int cx2 = c2 + (tri->step[1][i] * 16); - int cx3 = c3 + (tri->step[2][i] * 16); - - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) { - /* the block is completely outside the triangle - nop */ - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) { + for (i = 0; i < 16; i++) { + int cx1 = c1 + (tri->inputs.step[0][i] * 16); + int cx2 = c2 + (tri->inputs.step[1][i] * 16); + int cx3 = c3 + (tri->inputs.step[2][i] * 16); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ + } + else { + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + + if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_16(rast_task, x+ix, y+iy); - } - else { + block_full_16(rast_task, tri, px, py); + } + else { /* the block is partially in/out of the triangle */ - do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); - } + do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); + } } } - - assert(rast_task->nr_blocks <= Elements(rast_task->blocks)); - - /* Shade the 4x4 pixel blocks */ - for (i = 0; i < rast_task->nr_blocks; i++) - lp_rast_shade_quads(rast, - thread_index, - &tri->inputs, - rast_task->blocks[i].x, - rast_task->blocks[i].y, - rast_task->blocks[i].mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index aeaf260af27..e15b987767c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -265,7 +265,7 @@ do_triangle_ccw(struct setup_context *setup, const int y3 = subpixel_snap(v3[0][1]); struct lp_scene *scene = lp_setup_get_current_scene(setup); - struct lp_rast_triangle *tri = lp_scene_alloc( scene, sizeof *tri ); + struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); float area, oneoverarea; int minx, maxx, miny, maxy; @@ -354,38 +354,29 @@ do_triangle_ccw(struct setup_context *setup, tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; { - int xstep1 = -tri->dy12; - int xstep2 = -tri->dy23; - int xstep3 = -tri->dy31; + const int xstep1 = -tri->dy12; + const int xstep2 = -tri->dy23; + const int xstep3 = -tri->dy31; - int ystep1 = tri->dx12; - int ystep2 = tri->dx23; - int ystep3 = tri->dx31; + const int ystep1 = tri->dx12; + const int ystep2 = tri->dx23; + const int ystep3 = tri->dx31; - int ix, iy; + int qx, qy, ix, iy; int i = 0; - int c1 = 0; - int c2 = 0; - int c3 = 0; - - for (iy = 0; iy < 4; iy++) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (ix = 0; ix < 4; ix++, i++) { - tri->step[0][i] = cx1; - tri->step[1][i] = cx2; - tri->step[2][i] = cx3; - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; + for (qy = 0; qy < 2; qy++) { + for (qx = 0; qx < 2; qx++) { + for (iy = 0; iy < 2; iy++) { + for (ix = 0; ix < 2; ix++, i++) { + int x = qx * 2 + ix; + int y = qy * 2 + iy; + tri->inputs.step[0][i] = x * xstep1 + y * ystep1; + tri->inputs.step[1][i] = x * xstep2 + y * ystep2; + tri->inputs.step[2][i] = x * xstep3 + y * ystep3; + } + } + } } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c0d5a70a553..4af37e365ec 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -175,8 +175,93 @@ generate_depth(LLVMBuilderRef builder, } +/** + * Generate the code to do inside/outside triangle testing for the + * four pixels in a 2x2 quad. This will set the four elements of the + * quad mask vector to 0 or ~0. + * \param i which quad of the quad group to test, in [0,3] + */ +static void +generate_tri_edge_mask(LLVMBuilderRef builder, + unsigned i, + LLVMValueRef *mask, /* ivec4, out */ + LLVMValueRef c0, /* int32 */ + LLVMValueRef c1, /* int32 */ + LLVMValueRef c2, /* int32 */ + LLVMValueRef step0_ptr, /* ivec4 */ + LLVMValueRef step1_ptr, /* ivec4 */ + LLVMValueRef step2_ptr) /* ivec4 */ +{ + /* + c0_vec = splat(c0) + c1_vec = splat(c1) + c2_vec = splat(c2) + s0_vec = c0_vec + step0_ptr[i] + s1_vec = c1_vec + step1_ptr[i] + s2_vec = c2_vec + step2_ptr[i] + m0_vec = s0_vec > {0,0,0,0} + m1_vec = s1_vec > {0,0,0,0} + m2_vec = s2_vec > {0,0,0,0} + mask = m0_vec & m1_vec & m2_vec + */ + struct lp_type i32_type; + LLVMTypeRef i32vec4_type; + + LLVMValueRef index; + LLVMValueRef c0_vec, c1_vec, c2_vec; + LLVMValueRef step0_vec, step1_vec, step2_vec; + LLVMValueRef m0_vec, m1_vec, m2_vec; + LLVMValueRef s0_vec, s1_vec, s2_vec; + LLVMValueRef m; + + LLVMValueRef zeros; + + assert(i < 4); + + /* int32 vector type */ + memset(&i32_type, 0, sizeof i32_type); + i32_type.floating = FALSE; /* values are integers */ + i32_type.sign = TRUE; /* values are signed */ + i32_type.norm = FALSE; /* values are not normalized */ + i32_type.width = 32; /* 32-bit int values */ + i32_type.length = 4; /* 4 elements per vector */ + + i32vec4_type = lp_build_int32_vec4_type(); + + /* int32_vec4 zero = {0,0,0,0} */ + zeros = LLVMConstNull(i32vec4_type); + + c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); + c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); + c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + + index = LLVMConstInt(LLVMInt32Type(), i, 0); + step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); + step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); + step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + + /** XXX with a little work, we could remove the add here and just + * compare c0_vec > step0_vec. + */ + s0_vec = LLVMBuildAdd(builder, c0_vec, step0_vec, ""); + s1_vec = LLVMBuildAdd(builder, c1_vec, step1_vec, ""); + s2_vec = LLVMBuildAdd(builder, c2_vec, step2_vec, ""); + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s0_vec, zeros); + m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s1_vec, zeros); + m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s2_vec, zeros); + + m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); + m = LLVMBuildAnd(builder, m, m2_vec, ""); + + lp_build_name(m, "m"); + + *mask = m; +} + + /** * Generate the fragment shader, depth/stencil test, and alpha tests. + * \param i which quad in the tile, in range [0,3] */ static void generate_fs(struct llvmpipe_context *lp, @@ -190,7 +275,13 @@ generate_fs(struct llvmpipe_context *lp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, - LLVMValueRef depth_ptr) + LLVMValueRef depth_ptr, + LLVMValueRef c0, + LLVMValueRef c1, + LLVMValueRef c2, + LLVMValueRef step0_ptr, + LLVMValueRef step1_ptr, + LLVMValueRef step2_ptr) { const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef elem_type; @@ -205,6 +296,8 @@ generate_fs(struct llvmpipe_context *lp, unsigned attrib; unsigned chan; + assert(i < 4); + elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); @@ -224,8 +317,13 @@ generate_fs(struct llvmpipe_context *lp, } lp_build_flow_scope_declare(flow, &z); + /* do triangle edge testing */ + generate_tri_edge_mask(builder, i, pmask, + c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + lp_build_mask_begin(&mask, flow, type, *pmask); + early_depth_test = key->depth.enabled && !key->alpha.enabled && @@ -376,17 +474,18 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; - LLVMTypeRef arg_types[9]; + LLVMTypeRef arg_types[14]; LLVMTypeRef func_type; + LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; - LLVMValueRef mask_ptr; LLVMValueRef color_ptr; LLVMValueRef depth_ptr; + LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; @@ -468,9 +567,17 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ - arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */ - arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[6] = LLVMPointerType(blend_vec_type, 0); /* color */ + arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMInt32Type(); /* c0 */ + arg_types[9] = LLVMInt32Type(); /* c1 */ + arg_types[10] = LLVMInt32Type(); /* c2 */ + /* Note: the step arrays are built as int32[16] but we interpret + * them here as int32_vec4[4]. + */ + arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ + arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ + arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -486,9 +593,14 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr = LLVMGetParam(variant->function, 3); dadx_ptr = LLVMGetParam(variant->function, 4); dady_ptr = LLVMGetParam(variant->function, 5); - mask_ptr = LLVMGetParam(variant->function, 6); - color_ptr = LLVMGetParam(variant->function, 7); - depth_ptr = LLVMGetParam(variant->function, 8); + color_ptr = LLVMGetParam(variant->function, 6); + depth_ptr = LLVMGetParam(variant->function, 7); + c0 = LLVMGetParam(variant->function, 8); + c1 = LLVMGetParam(variant->function, 9); + c2 = LLVMGetParam(variant->function, 10); + step0_ptr = LLVMGetParam(variant->function, 11); + step1_ptr = LLVMGetParam(variant->function, 12); + step2_ptr = LLVMGetParam(variant->function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -496,9 +608,14 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); - lp_build_name(mask_ptr, "mask"); lp_build_name(color_ptr, "color"); lp_build_name(depth_ptr, "depth"); + lp_build_name(c0, "c0"); + lp_build_name(c1, "c1"); + lp_build_name(c2, "c2"); + lp_build_name(step0_ptr, "step0"); + lp_build_name(step1_ptr, "step1"); + lp_build_name(step2_ptr, "step2"); /* * Function body @@ -526,7 +643,6 @@ generate_fragment(struct llvmpipe_context *lp, if(i != 0) lp_build_interp_soa_update(&interp, i); - fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, @@ -536,9 +652,11 @@ generate_fragment(struct llvmpipe_context *lp, i, &interp, sampler, - &fs_mask[i], + &fs_mask[i], /* output */ out_color, - depth_ptr_i); + depth_ptr_i, + c0, c1, c2, + step0_ptr, step1_ptr, step2_ptr); for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[chan][i] = out_color[chan]; -- cgit v1.2.3 From 7f2ba80025e4b534db72427a206e6a542fc2f520 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Dec 2009 11:29:37 +0000 Subject: llvmpipe: keep copy of framebuffer state in setup context Avoids crashes when first frame is rendered before window is mapped. Avoids potential issue where fb state is changed before setup context is flushed. --- src/gallium/drivers/llvmpipe/lp_setup.c | 24 ++++++++++++------------ src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 +- src/gallium/drivers/llvmpipe/lp_state_surface.c | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 76e09552377..e361e5df63a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -36,6 +36,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" #include "lp_debug.h" @@ -61,10 +62,9 @@ lp_setup_get_current_scene(struct setup_context *setup) setup->scene = lp_scene_dequeue(setup->empty_scenes); if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ - if (setup->fb) { - lp_scene_set_framebuffer_size(setup->scene, - setup->fb->width, setup->fb->height); - } + lp_scene_set_framebuffer_size(setup->scene, + setup->fb.width, + setup->fb.height); } return setup->scene; } @@ -134,9 +134,9 @@ lp_setup_rasterize_scene( struct setup_context *setup, struct lp_scene *scene = lp_setup_get_current_scene(setup); lp_rasterize_scene(setup->rast, - scene, - setup->fb, - write_depth); + scene, + &setup->fb, + write_depth); reset_context( setup ); @@ -152,7 +152,7 @@ begin_binning( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (setup->fb->cbufs[0]) { + if (setup->fb.cbufs[0]) { if (setup->clear.flags & PIPE_CLEAR_COLOR) lp_scene_bin_everywhere( scene, lp_rast_clear_color, @@ -163,7 +163,7 @@ begin_binning( struct setup_context *setup ) lp_rast_arg_null() ); } - if (setup->fb->zsbuf) { + if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, @@ -248,9 +248,9 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_state( setup, SETUP_FLUSHED ); - setup->fb = fb; + util_copy_framebuffer_state(&setup->fb, fb); - lp_scene_set_framebuffer_size(scene, setup->fb->width, setup->fb->height); + lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); } @@ -274,7 +274,7 @@ lp_setup_clear( struct setup_context *setup, if (flags & PIPE_CLEAR_DEPTHSTENCIL) { setup->clear.zstencil.clear_zstencil = - util_pack_z_stencil(setup->fb->zsbuf->format, + util_pack_z_stencil(setup->fb.zsbuf->format, depth, stencil); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 180d9eca84b..f6604a8034a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -65,7 +65,7 @@ struct setup_context { boolean ccw_is_frontface; unsigned cullmode; - const struct pipe_framebuffer_state *fb; + struct pipe_framebuffer_state fb; struct { unsigned flags; diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 21565436eb6..957e947fe02 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -68,7 +68,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, draw_set_mrd(lp->draw, mrd); } - lp_setup_bind_framebuffer( lp->setup, fb ); + lp_setup_bind_framebuffer( lp->setup, &lp->framebuffer ); lp->dirty |= LP_NEW_FRAMEBUFFER; } -- cgit v1.2.3 From b9d33db0a4cb818154b713a27834f66025b14672 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Dec 2009 17:08:13 -0700 Subject: llvmpipe: improve the in/out test a little Instead of: s = c + step m = s > 0 Do: m = step > c (with negated c) --- src/gallium/drivers/llvmpipe/lp_rast.c | 3 +-- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 24 ++++++------------------ 3 files changed, 9 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index b1bd27d3406..015865a6d6f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -281,11 +281,10 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; + const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; - const unsigned mask = ~0; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 9b1861223ae..d6e8d6d5ab5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -90,7 +90,7 @@ block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; + const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; lp_rast_shade_quads(rast_task->rast, rast_task->thread_index, &tri->inputs, @@ -133,7 +133,7 @@ do_block_4( struct lp_rasterizer_task *rast_task, rast_task->thread_index, &tri->inputs, x, y, - c1, c2, c3); + -c1, -c2, -c3); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 4af37e365ec..15b175a2c47 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -196,12 +196,9 @@ generate_tri_edge_mask(LLVMBuilderRef builder, c0_vec = splat(c0) c1_vec = splat(c1) c2_vec = splat(c2) - s0_vec = c0_vec + step0_ptr[i] - s1_vec = c1_vec + step1_ptr[i] - s2_vec = c2_vec + step2_ptr[i] - m0_vec = s0_vec > {0,0,0,0} - m1_vec = s1_vec > {0,0,0,0} - m2_vec = s2_vec > {0,0,0,0} + m0_vec = step0_ptr[i] > c0_vec + m1_vec = step1_ptr[i] > c1_vec + m2_vec = step2_ptr[i] > c2_vec mask = m0_vec & m1_vec & m2_vec */ struct lp_type i32_type; @@ -211,7 +208,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef c0_vec, c1_vec, c2_vec; LLVMValueRef step0_vec, step1_vec, step2_vec; LLVMValueRef m0_vec, m1_vec, m2_vec; - LLVMValueRef s0_vec, s1_vec, s2_vec; LLVMValueRef m; LLVMValueRef zeros; @@ -240,21 +236,13 @@ generate_tri_edge_mask(LLVMBuilderRef builder, step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); - /** XXX with a little work, we could remove the add here and just - * compare c0_vec > step0_vec. - */ - s0_vec = LLVMBuildAdd(builder, c0_vec, step0_vec, ""); - s1_vec = LLVMBuildAdd(builder, c1_vec, step1_vec, ""); - s2_vec = LLVMBuildAdd(builder, c2_vec, step2_vec, ""); - m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s0_vec, zeros); - m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s1_vec, zeros); - m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s2_vec, zeros); + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); + m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); + m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); m = LLVMBuildAnd(builder, m, m2_vec, ""); - lp_build_name(m, "m"); - *mask = m; } -- cgit v1.2.3 From 808170a0ff6c3a51a1b69a54ed8045b2e0f7d0d1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 09:00:58 -0700 Subject: llvmpipe: replace INT_MIN/2 with INT_MIN Since changing the in/out test we can just use INT_MIN to be sure the comparison against the step values always passes. --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 015865a6d6f..24393c8e891 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -281,7 +281,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; + const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index d6e8d6d5ab5..bc7397f50c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -90,7 +90,7 @@ block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) { /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2; + const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; lp_rast_shade_quads(rast_task->rast, rast_task->thread_index, &tri->inputs, -- cgit v1.2.3 From 5771f3d483e882d9f5b6c5f3bdb3c39696623b66 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 10:52:50 -0700 Subject: llvmpipe: remove unused code, added comments, etc --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 15b175a2c47..7ed727dbbce 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -210,8 +210,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef m0_vec, m1_vec, m2_vec; LLVMValueRef m; - LLVMValueRef zeros; - assert(i < 4); /* int32 vector type */ @@ -224,18 +222,27 @@ generate_tri_edge_mask(LLVMBuilderRef builder, i32vec4_type = lp_build_int32_vec4_type(); - /* int32_vec4 zero = {0,0,0,0} */ - zeros = LLVMConstNull(i32vec4_type); - + /* c0_vec = {c0, c0, c0, c0} + * Note that we emit this code four times but LLVM optimizes away + * three instances of it. + */ c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + lp_build_name(c0_vec, "edgeconst0vec"); + lp_build_name(c1_vec, "edgeconst1vec"); + lp_build_name(c2_vec, "edgeconst2vec"); + index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + lp_build_name(step0_vec, "step0vec"); + lp_build_name(step1_vec, "step1vec"); + lp_build_name(step2_vec, "step2vec"); + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); @@ -243,7 +250,13 @@ generate_tri_edge_mask(LLVMBuilderRef builder, m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); m = LLVMBuildAnd(builder, m, m2_vec, ""); + lp_build_name(m, "inoutmaskvec"); + *mask = m; + + /* + * if mask = {0,0,0,0} skip quad + */ } @@ -309,6 +322,7 @@ generate_fs(struct llvmpipe_context *lp, generate_tri_edge_mask(builder, i, pmask, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); -- cgit v1.2.3 From 7d9b97703aba0c751e2cf10025859cbfe66074b4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 14:22:43 -0700 Subject: llvmpipe: added function comments --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index dcc25fbff86..420d062fc72 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -402,6 +402,13 @@ lp_build_mask_check(struct lp_build_mask_context *mask) } +/** + * Begin a section of code which is predicated on a mask. + * \param mask the mask context, initialized here + * \param flow the flow context + * \param type the type of the mask + * \param value storage for the mask + */ void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, @@ -422,6 +429,11 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, } +/** + * Update boolean mask with given value (bitwise AND). + * Typically used to update the quad's pixel alive/killed mask + * after depth testing, alpha testing, TGSI_OPCODE_KIL, etc. + */ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) @@ -432,6 +444,9 @@ lp_build_mask_update(struct lp_build_mask_context *mask, } +/** + * End section of code which is predicated on a mask. + */ LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { -- cgit v1.2.3 From aeb6351a0961534e77771b962c296485b98b79fe Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Dec 2009 14:26:48 -0700 Subject: llvmpipe: fix upper/lower-case typo --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 420d062fc72..fe9c6941f74 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -46,7 +46,7 @@ * Enumeration of all possible flow constructs. */ enum lp_build_flow_construct_kind { - lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SCOPE, LP_BUILD_FLOW_SKIP, }; @@ -200,7 +200,7 @@ lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; @@ -241,7 +241,7 @@ lp_build_flow_scope_declare(struct lp_build_flow_context *flow, { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; @@ -263,7 +263,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; -- cgit v1.2.3 From 5ce0380a0f585b9e1fb616b749f7fd18a8afada1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 16:44:43 +0000 Subject: llvmpipe: merge setup and draw vbuf submodules The setup tiling engine is now plugged directly into the draw module as a rendering backend. Removed a couple of layering violations such that the setup code no longer reaches out into the surrounding llvmpipe state or context. --- src/gallium/drivers/llvmpipe/Makefile | 2 +- src/gallium/drivers/llvmpipe/SConscript | 2 +- src/gallium/drivers/llvmpipe/lp_context.c | 18 +- src/gallium/drivers/llvmpipe/lp_context.h | 15 - src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 2 - src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 559 ------------------------ src/gallium/drivers/llvmpipe/lp_prim_vbuf.h | 38 -- src/gallium/drivers/llvmpipe/lp_setup.c | 111 ++--- src/gallium/drivers/llvmpipe/lp_setup.h | 34 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 29 +- src/gallium/drivers/llvmpipe/lp_setup_vbuf.c | 520 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_state_derived.c | 223 +++------- src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +- 13 files changed, 698 insertions(+), 858 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_vbuf.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_vbuf.h create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_vbuf.c (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 345326e33d7..6ec97046e15 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -35,13 +35,13 @@ C_SOURCES = \ lp_fence.c \ lp_flush.c \ lp_jit.c \ - lp_prim_vbuf.c \ lp_rast.c \ lp_rast_tri.c \ lp_setup.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ + lp_setup_vbuf.c \ lp_query.c \ lp_screen.c \ lp_state_blend.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f0b71ef3eee..ae4303bd24f 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -46,7 +46,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_vbuf.c', 'lp_query.c', 'lp_scene.c', 'lp_scene_queue.c', @@ -55,6 +54,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', + 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 06aa0325403..0457ccc8a94 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,14 +31,12 @@ */ #include "draw/draw_context.h" -#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" #include "lp_texture.h" @@ -179,23 +177,11 @@ llvmpipe_create( struct pipe_screen *screen ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->setup = lp_setup_create( screen ); + llvmpipe->setup = lp_setup_create( screen, + llvmpipe->draw ); if (!llvmpipe->setup) goto fail; - llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); - if (!llvmpipe->vbuf_backend) - goto fail; - - llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); - if (!llvmpipe->vbuf) - goto fail; - - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); - draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); - - - /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 17c6939ff5b..b796148457e 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -93,17 +93,6 @@ struct llvmpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /* The reduced version of the primitive supplied by the state - * tracker. - */ - unsigned reduced_api_prim; - - /* The reduced primitive after unfilled triangles, wide-line - * decomposition, etc, are taken into account. This is the - * primitive actually rasterized. - */ - unsigned reduced_prim; - /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; @@ -113,10 +102,6 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - /** Draw module backend */ - struct vbuf_render *vbuf_backend; - struct draw_stage *vbuf; - unsigned tex_timestamp; boolean no_rast; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index b879b5e755e..91fcbc01c6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -70,8 +70,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, struct draw_context *draw = lp->draw; unsigned i; - lp->reduced_api_prim = u_reduced_prim(mode); - if (lp->dirty) llvmpipe_update_derived( lp ); diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c deleted file mode 100644 index 925e6f8b3bd..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ /dev/null @@ -1,559 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Interface between 'draw' module's output and the llvmpipe rasterizer/setup - * code. When the 'draw' module has finished filling a vertex buffer, the - * draw_arrays() functions below will be called. Loop over the vertices and - * call the point/line/tri setup functions. - * - * Authors - * Brian Paul - */ - - -#include "lp_context.h" -#include "lp_state.h" -#include "lp_prim_vbuf.h" -#include "lp_setup.h" -#include "draw/draw_context.h" -#include "draw/draw_vbuf.h" -#include "util/u_memory.h" -#include "util/u_prim.h" - - -#define LP_MAX_VBUF_INDEXES 1024 -#define LP_MAX_VBUF_SIZE 4096 - -typedef const float (*cptrf4)[4]; - -/** - * Subclass of vbuf_render. - */ -struct llvmpipe_vbuf_render -{ - struct vbuf_render base; - struct llvmpipe_context *llvmpipe; - struct setup_context *setup; - - uint prim; - uint vertex_size; - uint nr_vertices; - uint vertex_buffer_size; - void *vertex_buffer; -}; - - -/** cast wrapper */ -static struct llvmpipe_vbuf_render * -llvmpipe_vbuf_render(struct vbuf_render *vbr) -{ - return (struct llvmpipe_vbuf_render *) vbr; -} - - - - - - - -static const struct vertex_info * -lp_vbuf_get_vertex_info(struct vbuf_render *vbr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - return llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); -} - - -static boolean -lp_vbuf_allocate_vertices(struct vbuf_render *vbr, - ushort vertex_size, ushort nr_vertices) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - unsigned size = vertex_size * nr_vertices; - - if (cvbr->vertex_buffer_size < size) { - align_free(cvbr->vertex_buffer); - cvbr->vertex_buffer = align_malloc(size, 16); - cvbr->vertex_buffer_size = size; - } - - cvbr->vertex_size = vertex_size; - cvbr->nr_vertices = nr_vertices; - - return cvbr->vertex_buffer != NULL; -} - -static void -lp_vbuf_release_vertices(struct vbuf_render *vbr) -{ - /* keep the old allocation for next time */ -} - -static void * -lp_vbuf_map_vertices(struct vbuf_render *vbr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - return cvbr->vertex_buffer; -} - -static void -lp_vbuf_unmap_vertices(struct vbuf_render *vbr, - ushort min_index, - ushort max_index ) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); - /* do nothing */ -} - - -static boolean -lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - - llvmpipe_update_derived( cvbr->llvmpipe ); - - cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); - cvbr->prim = prim; - return TRUE; - -} - - -static INLINE cptrf4 get_vert( const void *vertex_buffer, - int index, - int stride ) -{ - return (cptrf4)((char *)vertex_buffer + index * stride); -} - - -/** - * draw elements / indexed primitives - */ -static void -lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; - const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); - const void *vertex_buffer = cvbr->vertex_buffer; - struct setup_context *setup_ctx = cvbr->setup; - unsigned i; - - switch (cvbr->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < nr; i++) { - lp_setup_point( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 1; i < nr; i += 2) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - if (nr) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[nr-1], stride), - get_vert(vertex_buffer, indices[0], stride) ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i+(i&1)-1], stride), - get_vert(vertex_buffer, indices[i-(i&1)], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i+(i&1)-2], stride), - get_vert(vertex_buffer, indices[i-(i&1)-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[0], stride), - get_vert(vertex_buffer, indices[i-1], stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_QUADS: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride)); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. - */ - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[0], stride) ); - } - break; - - default: - assert(0); - } -} - - -/** - * This function is hit when the draw module is working in pass-through mode. - * It's up to us to convert the vertex array into point/line/tri prims. - */ -static void -lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; - struct setup_context *setup_ctx = cvbr->setup; - const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); - const void *vertex_buffer = - (void *) get_vert(cvbr->vertex_buffer, start, stride); - unsigned i; - - switch (cvbr->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < nr; i++) { - lp_setup_point( setup_ctx, - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 1; i < nr; i += 2) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - for (i = 1; i < nr; i ++) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - if (nr) { - lp_setup_line( setup_ctx, - get_vert(vertex_buffer, nr-1, stride), - get_vert(vertex_buffer, 0, stride) ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-2, stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i++) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i+(i&1)-1, stride), - get_vert(vertex_buffer, i-(i&1), stride), - get_vert(vertex_buffer, i-2, stride) ); - } - } - else { - for (i = 2; i < nr; i++) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i+(i&1)-2, stride), - get_vert(vertex_buffer, i-(i&1)-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride), - get_vert(vertex_buffer, i-1, stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, 0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_QUADS: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. - */ - for (i = 2; i < nr; i += 1) { - lp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride) ); - } - break; - - default: - assert(0); - } -} - - - -static void -lp_vbuf_destroy(struct vbuf_render *vbr) -{ - FREE(vbr); -} - - -/** - * Create the post-transform vertex handler for the given context. - */ -struct vbuf_render * -lp_create_vbuf_backend(struct llvmpipe_context *lp) -{ - struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - - assert(lp->draw); - assert(lp->setup); - - - cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; - cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - - cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; - cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; - cvbr->base.map_vertices = lp_vbuf_map_vertices; - cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; - cvbr->base.set_primitive = lp_vbuf_set_primitive; - cvbr->base.draw = lp_vbuf_draw; - cvbr->base.draw_arrays = lp_vbuf_draw_arrays; - cvbr->base.release_vertices = lp_vbuf_release_vertices; - cvbr->base.destroy = lp_vbuf_destroy; - - cvbr->llvmpipe = lp; - cvbr->setup = lp->setup; - - return &cvbr->base; -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h deleted file mode 100644 index 0676e2f42ac..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_VBUF_H -#define LP_VBUF_H - - -struct llvmpipe_context; - -extern struct vbuf_render * -lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); - - -#endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e361e5df63a..e2b21aed473 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -39,19 +39,22 @@ #include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" -#include "lp_debug.h" -#include "lp_fence.h" -#include "lp_state.h" #include "lp_buffer.h" #include "lp_texture.h" +#include "lp_debug.h" +#include "lp_fence.h" +#include "lp_rast.h" #include "lp_setup_context.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" + /** XXX temporary value, temporary here */ #define MAX_SCENES 2 -static void set_state( struct setup_context *, unsigned ); +static void set_scene_state( struct setup_context *, unsigned ); struct lp_scene * @@ -76,7 +79,7 @@ first_triangle( struct setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); lp_setup_choose_triangle( setup ); setup->triangle( setup, v0, v1, v2 ); } @@ -86,7 +89,7 @@ first_line( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); lp_setup_choose_line( setup ); setup->line( setup, v0, v1 ); } @@ -95,7 +98,7 @@ static void first_point( struct setup_context *setup, const float (*v0)[4]) { - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); lp_setup_choose_point( setup ); setup->point( setup, v0 ); } @@ -194,7 +197,7 @@ execute_clears( struct setup_context *setup ) static void -set_state( struct setup_context *setup, +set_scene_state( struct setup_context *setup, unsigned new_state ) { unsigned old_state = setup->state; @@ -234,7 +237,7 @@ lp_setup_flush( struct setup_context *setup, { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - set_state( setup, SETUP_FLUSHED ); + set_scene_state( setup, SETUP_FLUSHED ); } @@ -246,7 +249,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - set_state( setup, SETUP_FLUSHED ); + set_scene_state( setup, SETUP_FLUSHED ); util_copy_framebuffer_state(&setup->fb, fb); @@ -302,7 +305,7 @@ lp_setup_clear( struct setup_context *setup, * buffers which the app or state-tracker might issue * separately. */ - set_state( setup, SETUP_CLEARED ); + set_scene_state( setup, SETUP_CLEARED ); setup->clear.flags |= flags; } @@ -321,7 +324,7 @@ lp_setup_fence( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); - set_state( setup, SETUP_ACTIVE ); + set_scene_state( setup, SETUP_ACTIVE ); /* insert the fence into all command bins */ lp_scene_bin_everywhere( scene, @@ -358,13 +361,13 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_fs( struct setup_context *setup, - struct lp_fragment_shader *fs ) +lp_setup_set_fs_function( struct setup_context *setup, + lp_jit_frag_func jit_function ) { - LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) fs); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function); /* FIXME: reference count */ - setup->fs.current.jit_function = fs ? fs->current->jit_function : NULL; + setup->fs.current.jit_function = jit_function; setup->dirty |= LP_SETUP_NEW_FS; } @@ -406,6 +409,25 @@ lp_setup_set_blend_color( struct setup_context *setup, } } + +void +lp_setup_set_flatshade_first( struct setup_context *setup, + boolean flatshade_first ) +{ + setup->flatshade_first = flatshade_first; +} + + +void +lp_setup_set_vertex_info( struct setup_context *setup, + struct vertex_info *vertex_info ) +{ + /* XXX: just silently holding onto the pointer: + */ + setup->vertex_info = vertex_info; +} + + void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture) @@ -452,8 +474,8 @@ lp_setup_is_texture_referenced( struct setup_context *setup, } -static INLINE void -lp_setup_update_shader_state( struct setup_context *setup ) +void +lp_setup_update_state( struct setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -548,36 +570,6 @@ lp_setup_update_shader_state( struct setup_context *setup ) } -/* Stubs for lines & points for now: - */ -void -lp_setup_point(struct setup_context *setup, - const float (*v0)[4]) -{ - lp_setup_update_shader_state(setup); - setup->point( setup, v0 ); -} - -void -lp_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - lp_setup_update_shader_state(setup); - setup->line( setup, v0, v1 ); -} - -void -lp_setup_tri(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) -{ - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - - lp_setup_update_shader_state(setup); - setup->triangle( setup, v0, v1, v2 ); -} void @@ -602,11 +594,13 @@ lp_setup_destroy( struct setup_context *setup ) /** - * Create a new primitive tiling engine. Currently also creates a - * rasterizer to use with it. + * Create a new primitive tiling engine. Plug it into the backend of + * the draw module. Currently also creates a rasterizer to use with + * it. */ struct setup_context * -lp_setup_create( struct pipe_screen *screen ) +lp_setup_create( struct pipe_screen *screen, + struct draw_context *draw ) { unsigned i; struct setup_context *setup = CALLOC_STRUCT(setup_context); @@ -614,6 +608,8 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup) return NULL; + lp_setup_init_vbuf(setup); + setup->empty_scenes = lp_scene_queue_create(); if (!setup->empty_scenes) goto fail; @@ -622,6 +618,13 @@ lp_setup_create( struct pipe_screen *screen ) if (!setup->rast) goto fail; + setup->vbuf = draw_vbuf_stage(draw, &setup->base); + if (!setup->vbuf) + goto fail; + + draw_set_rasterize_stage(draw, setup->vbuf); + draw_set_render(draw, &setup->base); + /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { struct lp_scene *scene = lp_scene_create(); @@ -637,6 +640,12 @@ lp_setup_create( struct pipe_screen *screen ) return setup; fail: + if (setup->rast) + lp_rast_destroy( setup->rast ); + + if (setup->vbuf) + ; + if (setup->empty_scenes) lp_scene_queue_destroy(setup->empty_scenes); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 5c606e86afc..a6120fcbe40 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -28,6 +28,10 @@ #define LP_SETUP_H #include "pipe/p_compiler.h" +#include "lp_jit.h" + +struct draw_context; +struct vertex_info; enum lp_interp { LP_INTERP_CONSTANT, @@ -58,7 +62,8 @@ struct lp_fragment_shader; struct lp_jit_context; struct setup_context * -lp_setup_create( struct pipe_screen *screen ); +lp_setup_create( struct pipe_screen *screen, + struct draw_context *draw ); void lp_setup_clear(struct setup_context *setup, @@ -71,22 +76,6 @@ struct pipe_fence_handle * lp_setup_fence( struct setup_context *setup ); -void -lp_setup_tri(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]); - -void -lp_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]); - -void -lp_setup_point( struct setup_context *setup, - const float (*v0)[4] ); - - void lp_setup_flush( struct setup_context *setup, unsigned flags ); @@ -107,8 +96,8 @@ lp_setup_set_fs_inputs( struct setup_context *setup, unsigned nr ); void -lp_setup_set_fs( struct setup_context *setup, - struct lp_fragment_shader *fs ); +lp_setup_set_fs_function( struct setup_context *setup, + lp_jit_frag_func jit_function ); void lp_setup_set_fs_constants(struct setup_context *setup, @@ -131,6 +120,13 @@ boolean lp_setup_is_texture_referenced( struct setup_context *setup, const struct pipe_texture *texture ); +void +lp_setup_set_flatshade_first( struct setup_context *setup, + boolean flatshade_first ); + +void +lp_setup_set_vertex_info( struct setup_context *setup, + struct vertex_info *info ); void lp_setup_destroy( struct setup_context *setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index f6604a8034a..d2278a46e66 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -40,6 +40,7 @@ #include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_scene.h" +#include "draw/draw_vbuf.h" #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 @@ -53,15 +54,31 @@ struct lp_scene_queue; * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, * not arbitrary malloc'd memory. + * + * + * Subclass of vbuf_render, plugged directly into the draw module as + * the rendering backend. */ -struct setup_context { - +struct setup_context +{ + struct vbuf_render base; + + struct vertex_info *vertex_info; + uint prim; + uint vertex_size; + uint nr_vertices; + uint vertex_buffer_size; + void *vertex_buffer; + + /* Final pipeline stage for draw module. Draw module should + * create/install this itself now. + */ + struct draw_stage *vbuf; struct lp_rasterizer *rast; - - struct lp_scene *scene; /**< current scene */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ + boolean flatshade_first; boolean ccw_is_frontface; unsigned cullmode; @@ -120,4 +137,8 @@ void lp_setup_choose_point( struct setup_context *setup ); struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); +void lp_setup_init_vbuf(struct setup_context *setup); + +void lp_setup_update_state( struct setup_context *setup ); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c new file mode 100644 index 00000000000..5cd4f354fd6 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -0,0 +1,520 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Interface between 'draw' module's output and the llvmpipe rasterizer/setup + * code. When the 'draw' module has finished filling a vertex buffer, the + * draw_arrays() functions below will be called. Loop over the vertices and + * call the point/line/tri setup functions. + * + * Authors + * Brian Paul + */ + + +#include "lp_setup_context.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + + +#define LP_MAX_VBUF_INDEXES 1024 +#define LP_MAX_VBUF_SIZE 4096 + + + +/** cast wrapper */ +static struct setup_context * +setup_context(struct vbuf_render *vbr) +{ + return (struct setup_context *) vbr; +} + + + +static const struct vertex_info * +lp_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct setup_context *setup = setup_context(vbr); + return setup->vertex_info; +} + + +static boolean +lp_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct setup_context *setup = setup_context(vbr); + unsigned size = vertex_size * nr_vertices; + + if (setup->vertex_buffer_size < size) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = align_malloc(size, 16); + setup->vertex_buffer_size = size; + } + + setup->vertex_size = vertex_size; + setup->nr_vertices = nr_vertices; + + return setup->vertex_buffer != NULL; +} + +static void +lp_vbuf_release_vertices(struct vbuf_render *vbr) +{ + /* keep the old allocation for next time */ +} + +static void * +lp_vbuf_map_vertices(struct vbuf_render *vbr) +{ + struct setup_context *setup = setup_context(vbr); + return setup->vertex_buffer; +} + +static void +lp_vbuf_unmap_vertices(struct vbuf_render *vbr, + ushort min_index, + ushort max_index ) +{ + struct setup_context *setup = setup_context(vbr); + assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size ); + /* do nothing */ +} + + +static boolean +lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + setup_context(vbr)->prim = prim; + return TRUE; +} + +typedef const float (*const_float4_ptr)[4]; + +static INLINE const_float4_ptr get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (const_float4_ptr)((char *)vertex_buffer + index * stride); +} + +/** + * draw elements / indexed primitives + */ +static void +lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +{ + struct setup_context *setup = setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = setup->vertex_buffer; + unsigned i; + + lp_setup_update_state(setup); + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + } + } + else { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i+(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-(i&1)], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride)); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. Note that the first polygon vertex is passed as + * the last triangle vertex here. + * flatshade_first state makes no difference. + */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + default: + assert(0); + } +} + + +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ + struct setup_context *setup = setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = + (void *) get_vert(setup->vertex_buffer, start, stride); + unsigned i; + + lp_setup_update_state(setup); + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-2, stride) ); + } + } + else { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (setup->flatshade_first) { + for (i = 2; i < nr; i++) { + setup->triangle( setup, + get_vert(vertex_buffer, i+(i&1)-1, stride), + get_vert(vertex_buffer, i-(i&1), stride), + get_vert(vertex_buffer, i-2, stride) ); + } + } + else { + for (i = 2; i < nr; i++) { + setup->triangle( setup, + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + setup->triangle( setup, + + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. Note that the first polygon vertex is passed as + * the last triangle vertex here. + * flatshade_first state makes no difference. + */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + default: + assert(0); + } +} + + + +static void +lp_vbuf_destroy(struct vbuf_render *vbr) +{ + lp_setup_destroy(setup_context(vbr)); +} + + +/** + * Create the post-transform vertex handler for the given context. + */ +void +lp_setup_init_vbuf(struct setup_context *setup) +{ + setup->base.max_indices = LP_MAX_VBUF_INDEXES; + setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; + + setup->base.get_vertex_info = lp_vbuf_get_vertex_info; + setup->base.allocate_vertices = lp_vbuf_allocate_vertices; + setup->base.map_vertices = lp_vbuf_map_vertices; + setup->base.unmap_vertices = lp_vbuf_unmap_vertices; + setup->base.set_primitive = lp_vbuf_set_primitive; + setup->base.draw = lp_vbuf_draw; + setup->base.draw_arrays = lp_vbuf_draw_arrays; + setup->base.release_vertices = lp_vbuf_release_vertices; + setup->base.destroy = lp_vbuf_destroy; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index a18efcc0e0f..ab827045ed6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -37,17 +37,6 @@ #include "lp_state.h" -/** - * Mark the current vertex layout as "invalid". - * We'll validate the vertex layout later, when we start to actually - * render a point or line or tri. - */ -static void -invalidate_vertex_layout(struct llvmpipe_context *llvmpipe) -{ - llvmpipe->vertex_info.num_attribs = 0; -} - /** * The vertex info describes how to convert the post-transformed vertices @@ -57,150 +46,95 @@ invalidate_vertex_layout(struct llvmpipe_context *llvmpipe) * This function validates the vertex layout and returns a pointer to a * vertex_info object. */ -struct vertex_info * -llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) +static void +compute_vertex_info(struct llvmpipe_context *llvmpipe) { - struct vertex_info *vinfo = &llvmpipe->vertex_info; - - if (vinfo->num_attribs == 0) { - /* compute vertex layout now */ - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const enum interp_mode colorInterp - = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* Tell draw_vbuf to simply emit the whole post-xform vertex - * as-is. No longer any need to try and emit draw vertex_header - * info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + const struct lp_fragment_shader *lpfs = llvmpipe->fs; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); + uint i; + + /* Tell draw_vbuf to simply emit the whole post-xform vertex as-is. + * + * Not really sure if this is the best approach. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); + } + draw_compute_vertex_size(vinfo_vbuf); - /* - * Loop over fragment shader inputs, searching for the matching output - * from the vertex shader. - */ - vinfo->num_attribs = 0; - for (i = 0; i < lpfs->info.num_inputs; i++) { - int src; - enum interp_mode interp; - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - interp = INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - interp = INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = INTERP_PERSPECTIVE; - break; - default: - assert(0); - interp = INTERP_LINEAR; - } + lp_setup_set_vertex_info(llvmpipe->setup, vinfo_vbuf); - switch (lpfs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - src = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); - break; +/* + llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); +*/ - case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - break; + /* Now match FS inputs against emitted vertex data. It's also + * entirely possible to just have a fixed layout for FS input, + * determined by the fragment shader itself, and adjust the draw + * outputs to match that. + */ + { + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; + for (i = 0; i < lpfs->info.num_inputs; i++) { - case TGSI_SEMANTIC_GENERIC: + /* This can be precomputed, except for flatshade: + */ + switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_FACE: - /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); + inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + inputs[i].interp = LP_INTERP_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + /* Colors are linearly interpolated in the fragment shader + * even when flatshading is active. This just tells the + * setup module to use coefficients with ddx==0 and + * ddy==0. + */ + if (llvmpipe->rasterizer->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; break; default: - assert(0); - } - } - - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_PSIZE, 0); - if (llvmpipe->psize_slot > 0) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, - llvmpipe->psize_slot); - } - - draw_compute_vertex_size(vinfo); - - { - struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - - for (i = 0; i < lpfs->info.num_inputs; i++) { - switch (vinfo->attrib[i].interp_mode) { - case INTERP_CONSTANT: + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: inputs[i].interp = LP_INTERP_CONSTANT; break; - case INTERP_LINEAR: + case TGSI_INTERPOLATE_LINEAR: inputs[i].interp = LP_INTERP_LINEAR; break; - case INTERP_PERSPECTIVE: + case TGSI_INTERPOLATE_PERSPECTIVE: inputs[i].interp = LP_INTERP_PERSPECTIVE; break; - case INTERP_POS: - inputs[i].interp = LP_INTERP_POSITION; - break; default: assert(0); + break; } - - if (lpfs->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) - inputs[i].interp = LP_INTERP_FACING; - - inputs[i].src_index = vinfo->attrib[i].src_index; } - lp_setup_set_fs_inputs(llvmpipe->setup, inputs, lpfs->info.num_inputs); + /* Search for each input in current vs output: + */ + inputs[i].src_index = + draw_find_vs_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); } - } - return vinfo; + lp_setup_set_fs_inputs(llvmpipe->setup, + inputs, + lpfs->info.num_inputs); + } } -/** - * Called from vbuf module. - * - * Note that there's actually two different vertex layouts in llvmpipe. - * - * The normal one is computed in llvmpipe_get_vertex_info() above and is - * used by the point/line/tri "setup" code. - * - * The other one (this one) is only used by the vbuf module (which is - * not normally used by default but used in testing). For the vbuf module, - * we basically want to pass-through the draw module's vertex layout as-is. - * When the llvmpipe vbuf code begins drawing, the normal vertex layout - * will come into play again. - */ -struct vertex_info * -llvmpipe_get_vbuf_vertex_info(struct llvmpipe_context *llvmpipe) -{ - (void) llvmpipe_get_vertex_info(llvmpipe); - return &llvmpipe->vertex_info_vbuf; -} - /** * Recompute cliprect from scissor bounds, scissor enable and surface size. @@ -273,7 +207,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | LP_NEW_VS)) - invalidate_vertex_layout( llvmpipe ); + compute_vertex_info( llvmpipe ); if (llvmpipe->dirty & (LP_NEW_SCISSOR | LP_NEW_RASTERIZER | @@ -287,36 +221,23 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); - if (llvmpipe->dirty & (LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA | - LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - llvmpipe_update_fs( llvmpipe ); - if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) - lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); + lp_setup_set_blend_color(llvmpipe->setup, + &llvmpipe->blend_color); if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) - lp_setup_set_alpha_ref_value(llvmpipe->setup, llvmpipe->depth_stencil->alpha.ref_value); + lp_setup_set_alpha_ref_value(llvmpipe->setup, + llvmpipe->depth_stencil->alpha.ref_value); if (llvmpipe->dirty & LP_NEW_CONSTANTS) - lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT].buffer); + lp_setup_set_fs_constants(llvmpipe->setup, + llvmpipe->constants[PIPE_SHADER_FRAGMENT].buffer); if (llvmpipe->dirty & LP_NEW_TEXTURE) - lp_setup_set_sampler_textures(llvmpipe->setup, llvmpipe->num_textures, llvmpipe->texture); + lp_setup_set_sampler_textures(llvmpipe->setup, + llvmpipe->num_textures, + llvmpipe->texture); llvmpipe->dirty = 0; } - -#if 0 -void llvmpipe_prepare(struct lp_setup_context *setup) -{ - struct llvmpipe_context *lp = setup->llvmpipe; - - if (lp->dirty) { - llvmpipe_update_derived(lp); - } - -} -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7ed727dbbce..3ad58415e39 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -891,5 +891,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) shader->current = variant; - lp_setup_set_fs(lp->setup, shader); + lp_setup_set_fs_function(lp->setup, + shader->current->jit_function); } -- cgit v1.2.3 From 72120292b981fd96e1127f927d7257255c65befd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 16:56:28 +0000 Subject: llvmpipe: restrict header visibility --- src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index d2a6ae21f57..d59d7681398 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -47,7 +47,7 @@ #include "lp_bld_intr.h" #include "lp_bld_sample.h" #include "lp_bld_tgsi.h" -#include "lp_state.h" +#include "lp_jit.h" #include "lp_tex_sample.h" -- cgit v1.2.3 From c9240c4c8f67a06403b29992ab96b9a48f68b01d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 17:00:26 +0000 Subject: llvmpipe: remove dead code --- src/gallium/drivers/llvmpipe/lp_clear.c | 1 - src/gallium/drivers/llvmpipe/lp_context.h | 1 - src/gallium/drivers/llvmpipe/lp_flush.c | 1 - src/gallium/drivers/llvmpipe/lp_rast.c | 1 - src/gallium/drivers/llvmpipe/lp_state.h | 7 ------- src/gallium/drivers/llvmpipe/lp_state_derived.c | 16 ++++++++-------- 6 files changed, 8 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 4bae44e2ea2..3e8c4109251 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -36,7 +36,6 @@ #include "lp_clear.h" #include "lp_context.h" #include "lp_setup.h" -#include "lp_state.h" /** diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index b796148457e..194692045dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -88,7 +88,6 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; - struct vertex_info vertex_info_vbuf; /** Which vertex shader output slot contains point size */ int psize_slot; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index e6519cb216b..9405150c4f7 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -35,7 +35,6 @@ #include "lp_flush.h" #include "lp_context.h" #include "lp_surface.h" -#include "lp_state.h" #include "lp_winsys.h" #include "lp_setup.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 24393c8e891..6772ff332ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -34,7 +34,6 @@ #include "lp_scene_queue.h" #include "lp_debug.h" #include "lp_fence.h" -#include "lp_state.h" #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 64fe3600f5e..6017dc553a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -215,11 +215,4 @@ void llvmpipe_unmap_texture_surfaces(struct llvmpipe_context *lp); -struct vertex_info * -llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe); - -struct vertex_info * -llvmpipe_get_vbuf_vertex_info(struct llvmpipe_context *llvmpipe); - - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index ab827045ed6..cc7b09fd4d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -43,29 +43,29 @@ * (simple float[][4]) used by the 'draw' module into vertices for * rasterization. * - * This function validates the vertex layout and returns a pointer to a - * vertex_info object. + * This function validates the vertex layout. */ static void compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + struct vertex_info *vinfo = &llvmpipe->vertex_info; const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - /* Tell draw_vbuf to simply emit the whole post-xform vertex as-is. + /* Tell setup to tell the draw module to simply emit the whole + * post-xform vertex as-is. * * Not really sure if this is the best approach. */ - vinfo_vbuf->num_attribs = 0; + vinfo->num_attribs = 0; for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, i); } - draw_compute_vertex_size(vinfo_vbuf); + draw_compute_vertex_size(vinfo); - lp_setup_set_vertex_info(llvmpipe->setup, vinfo_vbuf); + lp_setup_set_vertex_info(llvmpipe->setup, vinfo); /* llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, -- cgit v1.2.3 From 601969c58729e26db6a33645a6a9ddb0b6ea2b92 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 17:13:28 +0000 Subject: llvmpipe: rename some functions --- src/gallium/drivers/llvmpipe/lp_setup_vbuf.c | 36 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 5cd4f354fd6..42c30af5bac 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -59,7 +59,7 @@ setup_context(struct vbuf_render *vbr) static const struct vertex_info * -lp_vbuf_get_vertex_info(struct vbuf_render *vbr) +lp_setup_get_vertex_info(struct vbuf_render *vbr) { struct setup_context *setup = setup_context(vbr); return setup->vertex_info; @@ -67,7 +67,7 @@ lp_vbuf_get_vertex_info(struct vbuf_render *vbr) static boolean -lp_vbuf_allocate_vertices(struct vbuf_render *vbr, +lp_setup_allocate_vertices(struct vbuf_render *vbr, ushort vertex_size, ushort nr_vertices) { struct setup_context *setup = setup_context(vbr); @@ -86,20 +86,20 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, } static void -lp_vbuf_release_vertices(struct vbuf_render *vbr) +lp_setup_release_vertices(struct vbuf_render *vbr) { /* keep the old allocation for next time */ } static void * -lp_vbuf_map_vertices(struct vbuf_render *vbr) +lp_setup_map_vertices(struct vbuf_render *vbr) { struct setup_context *setup = setup_context(vbr); return setup->vertex_buffer; } static void -lp_vbuf_unmap_vertices(struct vbuf_render *vbr, +lp_setup_unmap_vertices(struct vbuf_render *vbr, ushort min_index, ushort max_index ) { @@ -110,7 +110,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr, static boolean -lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim) { setup_context(vbr)->prim = prim; return TRUE; @@ -129,7 +129,7 @@ static INLINE const_float4_ptr get_vert( const void *vertex_buffer, * draw elements / indexed primitives */ static void -lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { struct setup_context *setup = setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); @@ -312,7 +312,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) * It's up to us to convert the vertex array into point/line/tri prims. */ static void -lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct setup_context *setup = setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); @@ -493,7 +493,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void -lp_vbuf_destroy(struct vbuf_render *vbr) +lp_setup_vbuf_destroy(struct vbuf_render *vbr) { lp_setup_destroy(setup_context(vbr)); } @@ -508,13 +508,13 @@ lp_setup_init_vbuf(struct setup_context *setup) setup->base.max_indices = LP_MAX_VBUF_INDEXES; setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - setup->base.get_vertex_info = lp_vbuf_get_vertex_info; - setup->base.allocate_vertices = lp_vbuf_allocate_vertices; - setup->base.map_vertices = lp_vbuf_map_vertices; - setup->base.unmap_vertices = lp_vbuf_unmap_vertices; - setup->base.set_primitive = lp_vbuf_set_primitive; - setup->base.draw = lp_vbuf_draw; - setup->base.draw_arrays = lp_vbuf_draw_arrays; - setup->base.release_vertices = lp_vbuf_release_vertices; - setup->base.destroy = lp_vbuf_destroy; + setup->base.get_vertex_info = lp_setup_get_vertex_info; + setup->base.allocate_vertices = lp_setup_allocate_vertices; + setup->base.map_vertices = lp_setup_map_vertices; + setup->base.unmap_vertices = lp_setup_unmap_vertices; + setup->base.set_primitive = lp_setup_set_primitive; + setup->base.draw = lp_setup_draw; + setup->base.draw_arrays = lp_setup_draw_arrays; + setup->base.release_vertices = lp_setup_release_vertices; + setup->base.destroy = lp_setup_vbuf_destroy; } -- cgit v1.2.3 From b08583da468ee186b43ea678f8d33fb7df3ab372 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 17:13:37 +0000 Subject: llvmpipe: fix double free --- src/gallium/drivers/llvmpipe/lp_context.c | 5 ++--- src/gallium/drivers/llvmpipe/lp_setup.c | 3 ++- src/gallium/drivers/llvmpipe/lp_setup.h | 2 -- src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 ++ 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 0457ccc8a94..696a9d5f6a8 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -53,12 +53,11 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); uint i; + /* This will also destroy llvmpipe->setup: + */ if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); - if (llvmpipe->setup) - lp_setup_destroy( llvmpipe->setup ); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e2b21aed473..1eb944a0de7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -571,7 +571,8 @@ lp_setup_update_state( struct setup_context *setup ) - +/* Only caller is lp_setup_vbuf_destroy() + */ void lp_setup_destroy( struct setup_context *setup ) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index a6120fcbe40..bf12cb85271 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -128,7 +128,5 @@ void lp_setup_set_vertex_info( struct setup_context *setup, struct vertex_info *info ); -void -lp_setup_destroy( struct setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index d2278a46e66..a1808fcd4c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -141,4 +141,6 @@ void lp_setup_init_vbuf(struct setup_context *setup); void lp_setup_update_state( struct setup_context *setup ); +void lp_setup_destroy( struct setup_context *setup ); + #endif -- cgit v1.2.3 From 4e8d67af574af480fdcca79e23836464c86b2dee Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 4 Jan 2010 15:22:58 -0700 Subject: llvmpipe: flow-control comments --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index fe9c6941f74..22b4310f061 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -145,6 +145,10 @@ lp_build_flow_destroy(struct lp_build_flow_context *flow) } +/** + * Begin/push a new flow control construct, such as a loop, skip block + * or variable scope. + */ static union lp_build_flow_construct_data * lp_build_flow_push(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -158,6 +162,10 @@ lp_build_flow_push(struct lp_build_flow_context *flow, } +/** + * Return the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ static union lp_build_flow_construct_data * lp_build_flow_peek(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -174,6 +182,10 @@ lp_build_flow_peek(struct lp_build_flow_context *flow, } +/** + * End/pop the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ static union lp_build_flow_construct_data * lp_build_flow_pop(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -213,11 +225,11 @@ lp_build_flow_scope_begin(struct lp_build_flow_context *flow) * * A variable is a named entity which can have different LLVMValueRef's at * different points of the program. This is relevant for control flow because - * when there are mutiple branches to a same location we need to replace + * when there are multiple branches to a same location we need to replace * the variable's value with a Phi function as explained in * http://en.wikipedia.org/wiki/Static_single_assignment_form . * - * We keep track of variables by keeping around a pointer to where their + * We keep track of variables by keeping around a pointer to where they're * current. * * There are a few cautions to observe: @@ -386,6 +398,9 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) } +/** + * Check if the mask predicate is zero. If so, jump to the end of the block. + */ static void lp_build_mask_check(struct lp_build_mask_context *mask) { -- cgit v1.2.3 From db7f9b053b7982810a00bc4d944bb3dfa2b9aac9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jan 2010 14:11:54 -0700 Subject: llvmpipe: more comments in flow builder code --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 22b4310f061..e42b653b677 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -310,6 +310,11 @@ lp_build_flow_insert_block(struct lp_build_flow_context *flow) return new_block; } + +/** + * Begin a "skip" block. Inside this block we can test a condition and + * skip to the end of the block if the condition is false. + */ void lp_build_flow_skip_begin(struct lp_build_flow_context *flow) { @@ -321,13 +326,16 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) if(!skip) return; + /* create new basic block */ skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; if(!skip->num_variables) { skip->phi = NULL; return; } + /* Allocate a Phi node for each variable in this skip scope */ skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); if(!skip->phi) { skip->num_variables = 0; @@ -337,6 +345,7 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, skip->block); + /* create a Phi node for each variable */ for(i = 0; i < skip->num_variables; ++i) skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); @@ -344,6 +353,10 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) } +/** + * Insert code to test a condition and branch to the end of the current + * skip block if the condition is true. + */ void lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, LLVMValueRef cond) @@ -361,15 +374,17 @@ lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, new_block = lp_build_flow_insert_block(flow); + /* for each variable, update the Phi node with a (variable, block) pair */ for(i = 0; i < skip->num_variables; ++i) { assert(*flow->variables[i]); LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); } + /* if cond is true, goto skip->block, else goto new_block */ LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); LLVMPositionBuilderAtEnd(flow->builder, new_block); - } +} void @@ -385,12 +400,14 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) current_block = LLVMGetInsertBlock(flow->builder); + /* add (variable, block) tuples to the phi nodes */ for(i = 0; i < skip->num_variables; ++i) { assert(*flow->variables[i]); LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); *flow->variables[i] = skip->phi[i]; } + /* goto block */ LLVMBuildBr(flow->builder, skip->block); LLVMPositionBuilderAtEnd(flow->builder, skip->block); @@ -407,12 +424,14 @@ lp_build_mask_check(struct lp_build_mask_context *mask) LLVMBuilderRef builder = mask->flow->builder; LLVMValueRef cond; + /* cond = (mask == 0) */ cond = LLVMBuildICmp(builder, LLVMIntEQ, LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), LLVMConstNull(mask->reg_type), ""); + /* if cond, goto end of block */ lp_build_flow_skip_cond_break(mask->flow, cond); } -- cgit v1.2.3 From baeb3a23513b9045c1a50bbe21124a4f8a9b6cd6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Jan 2010 17:53:12 -0700 Subject: llvmpipe: checkpoint commit of new if/else/endif flow control Totally untested at this point. More work to do. --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 243 ++++++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 27 ++++ 2 files changed, 269 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index e42b653b677..230edc6a5cf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -41,6 +41,8 @@ #define LP_BUILD_FLOW_MAX_VARIABLES 32 #define LP_BUILD_FLOW_MAX_DEPTH 32 +#define LP_BUILD_IF_MAX_VARIABLES 8 + /** * Enumeration of all possible flow constructs. @@ -48,6 +50,7 @@ enum lp_build_flow_construct_kind { LP_BUILD_FLOW_SCOPE, LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_IF }; @@ -73,7 +76,24 @@ struct lp_build_flow_skip /** Number of variables declared at the beginning */ unsigned num_variables; - LLVMValueRef *phi; + LLVMValueRef *phi; /**< array [num_variables] */ +}; + + +/** + * if/else/endif. + */ +struct lp_build_flow_if +{ + unsigned num_variables; + + /** phi variables in the true clause */ + LLVMValueRef true_variables[LP_BUILD_IF_MAX_VARIABLES]; + unsigned num_true_variables; + + /** phi variables in the false clause */ + LLVMValueRef false_variables[LP_BUILD_IF_MAX_VARIABLES]; + unsigned num_false_variables; }; @@ -84,6 +104,7 @@ union lp_build_flow_construct_data { struct lp_build_flow_scope scope; struct lp_build_flow_skip skip; + struct lp_build_flow_if ifthen; }; @@ -540,3 +561,223 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMPositionBuilderAtEnd(builder, after_block); } + + +/* + Example of if/then/else building: + + int x; + if (cond) { + x = 1 + 2; + } + else { + x = 2 + 3; + } + + Is built with: + + flow = lp_build_flow_create(builder); + ... + + lp_build_flow_scope_declare(flow, "x"); + + lp_build_if(ctx, flow, builder, cond); + x = LLVMAdd(1, 2); + lp_build_if_phi_var(ctx, "x"); + lp_build_else(ctx); + x = LLVMAdd(2, 3); + lp_build_if_phi_var(ctx, "x"); + lp_build_endif(ctx); + + ... + + flow = lp_build_flow_end(flow); + */ + + + +/** + * Begin an if/else/endif construct. + */ +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + struct lp_build_flow_if *ifthen; + + memset(ctx, 0, sizeof(*ctx)); + ctx->builder = builder; + ctx->flow = flow; + ctx->condition = condition; + ctx->entry_block = block; + + /* push/create new scope */ + ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + ifthen->num_variables = flow->num_variables; + ifthen->num_true_variables = 0; + ifthen->num_false_variables = 0; + + /* allocate the block for the if/true clause */ + ctx->true_block = LLVMAppendBasicBlock(function, "true block"); + /* XXX is this correct ??? */ + LLVMPositionBuilderAtEnd(builder, ctx->true_block); +} + + +/** + * Begin else-part of a conditional + */ +void +lp_build_else(struct lp_build_if_state *ctx) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + struct lp_build_flow_if *ifthen; + + ifthen = &lp_build_flow_peek(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + /* allocate the block for the else/false clause */ + ctx->false_block = LLVMAppendBasicBlock(function, "false block"); + /* XXX is this correct ??? */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); +} + + +/** + * End a conditional. + * This involves building a "merge" block at the endif which + * contains the phi instructions. + */ +void +lp_build_endif(struct lp_build_if_state *ctx) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + LLVMBasicBlockRef merge_block = LLVMAppendBasicBlock(function, "endif block"); + LLVMValueRef phi[LP_BUILD_FLOW_MAX_VARIABLES]; + struct lp_build_flow_if *ifthen; + unsigned i; + + /* build the endif/merge block now */ + /* XXX this is probably wrong */ + LLVMPositionBuilderAtEnd(ctx->builder, merge_block); + + ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + memset(phi, 0, sizeof(phi)); + + /* build phi nodes for any variables which were declared inside if part */ + + for (i = 0; i < ifthen->num_variables; i++) { + LLVMValueRef *var = ctx->flow->variables[i]; + const char *name = LLVMGetValueName(*var); + unsigned j; + + /* search true-clause variables list for 'name' */ + for (j = 0; j < ifthen->num_true_variables; j++) { + LLVMValueRef v = ifthen->true_variables[j]; + if (strcmp(LLVMGetValueName(v), name) == 0) { + /* add phi */ + if (!phi[i]) + phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); + LLVMAddIncoming(phi[i], &v, &ctx->true_block, 1); + } + } + + /* search false-clause variables list for 'name' */ + for (j = 0; j < ifthen->num_false_variables; j++) { + LLVMValueRef v = ifthen->false_variables[j]; + if (strcmp(LLVMGetValueName(v), name) == 0) { + /* add phi */ + if (!phi[i]) + phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); + LLVMAddIncoming(phi[i], &v, &ctx->false_block, 1); + } + } + + /* "return" new phi variable to calling code */ + if (phi[i]) + *var = phi[i]; + } + + /*** + *** Insert the various branch instructions here. + *** XXX need to verify all the builder/block positioning is correct. + ***/ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->entry_block); + + if (ctx->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(ctx->builder, ctx->condition, + ctx->true_block, ctx->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(ctx->builder, ctx->condition, + ctx->true_block, merge_block); + } + + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->true_block); + LLVMBuildBr(ctx->builder, merge_block); + if (ctx->false_block) { + /* Append an unconditional Br(anch) instruction on the false_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); + LLVMBuildBr(ctx->builder, merge_block); + } + + + /* Finish-up: continue building at end of the merge_block */ + /* XXX is this right? */ + LLVMPositionBuilderAtEnd(ctx->builder, merge_block); +} + + +/** + * Declare a variable that needs to be merged with another variable + * via a phi function. + * This function must be called after lp_build_if() and lp_build_endif(). + */ +void +lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var) +{ + struct lp_build_flow_if *ifthen; + const char *name; + + name = LLVMGetValueName(var); + assert(name && "variable requires a name"); + + /* make sure the var existed before the if/then/else */ + { + boolean found = FALSE; + uint i; + for (i = 0; i < ctx->flow->num_variables; i++) { + LLVMValueRef *var = ctx->flow->variables[i]; + if (strcmp(LLVMGetValueName(*var), name) == 0) { + found = TRUE; + break; + } + } + assert(found); + } + + ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + + if (ctx->false_block) { + ifthen->false_variables[ifthen->num_false_variables++] = var; + } + else { + assert(ctx->true_block); + ifthen->true_variables[ifthen->num_true_variables++] = var; + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index e61999ff06b..1f294b8a49d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -126,4 +126,31 @@ lp_build_loop_end(LLVMBuilderRef builder, + +struct lp_build_if_state +{ + LLVMBuilderRef builder; + struct lp_build_flow_context *flow; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block, true_block, false_block; +}; + + +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition); + +void +lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var); + +void +lp_build_else(struct lp_build_if_state *ctx); + +void +lp_build_endif(struct lp_build_if_state *ctx); + + + #endif /* !LP_BLD_FLOW_H */ -- cgit v1.2.3 From 70b8d59792a814a5a81b86d57016314754d91593 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 11:01:00 -0700 Subject: llvmpipe: checkpoint if/else/endif contructs work The LLVM IR looks correct now. Basic blocks are where they're supposed to be and the Phi functions have the right (var,block) information. --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 220 +++++++++++++---------------- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 5 +- 2 files changed, 96 insertions(+), 129 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 230edc6a5cf..a347cedf038 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -87,13 +87,7 @@ struct lp_build_flow_if { unsigned num_variables; - /** phi variables in the true clause */ - LLVMValueRef true_variables[LP_BUILD_IF_MAX_VARIABLES]; - unsigned num_true_variables; - - /** phi variables in the false clause */ - LLVMValueRef false_variables[LP_BUILD_IF_MAX_VARIABLES]; - unsigned num_false_variables; + LLVMValueRef *phi; /**< array [num_variables] */ }; @@ -310,28 +304,43 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) } +/** + * Note: this function has no dependencies on the flow code and could + * be used elsewhere. + */ static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) { LLVMBasicBlockRef current_block; LLVMBasicBlockRef next_block; LLVMBasicBlockRef new_block; - current_block = LLVMGetInsertBlock(flow->builder); + /* get current basic block */ + current_block = LLVMGetInsertBlock(builder); + /* check if there's another block after this one */ next_block = LLVMGetNextBasicBlock(current_block); - if(next_block) { - new_block = LLVMInsertBasicBlock(next_block, ""); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlock(next_block, name); } else { + /* append new block after current block */ LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlock(function, ""); + new_block = LLVMAppendBasicBlock(function, name); } return new_block; } +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + return lp_build_insert_new_block(flow->builder, ""); +} + + /** * Begin a "skip" block. Inside this block we can test a condition and * skip to the end of the block if the condition is false. @@ -576,22 +585,24 @@ lp_build_loop_end(LLVMBuilderRef builder, Is built with: + LLVMValueRef x = LLVMGetUndef(); // or something else + flow = lp_build_flow_create(builder); - ... - lp_build_flow_scope_declare(flow, "x"); + lp_build_flow_scope_begin(flow); + + // x needs a phi node + lp_build_flow_scope_declare(flow, &x); - lp_build_if(ctx, flow, builder, cond); - x = LLVMAdd(1, 2); - lp_build_if_phi_var(ctx, "x"); - lp_build_else(ctx); - x = LLVMAdd(2, 3); - lp_build_if_phi_var(ctx, "x"); - lp_build_endif(ctx); + lp_build_if(ctx, flow, builder, cond); + x = LLVMAdd(1, 2); + lp_build_else(ctx); + x = LLVMAdd(2, 3); + lp_build_endif(ctx); - ... + lp_build_flow_scope_end(flow); - flow = lp_build_flow_end(flow); + lp_build_flow_destroy(flow); */ @@ -606,8 +617,8 @@ lp_build_if(struct lp_build_if_state *ctx, LLVMValueRef condition) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); struct lp_build_flow_if *ifthen; + unsigned i; memset(ctx, 0, sizeof(*ctx)); ctx->builder = builder; @@ -620,12 +631,27 @@ lp_build_if(struct lp_build_if_state *ctx, assert(ifthen); ifthen->num_variables = flow->num_variables; - ifthen->num_true_variables = 0; - ifthen->num_false_variables = 0; - /* allocate the block for the if/true clause */ - ctx->true_block = LLVMAppendBasicBlock(function, "true block"); - /* XXX is this correct ??? */ + /* create a Phi node for each variable in this flow scope */ + ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); + if (!ifthen->phi) { + ifthen->num_variables = 0; + return; + } + + /* create endif/merge basic block for the phi functions */ + ctx->merge_block = lp_build_insert_new_block(builder, "endif-block"); + LLVMPositionBuilderAtEnd(builder, ctx->merge_block); + + /* create a phi node for each variable */ + for (i = 0; i < flow->num_variables; i++) + ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + + /* create/insert true_block before merge_block */ + ctx->true_block = LLVMInsertBasicBlock(ctx->merge_block, "if-true-block"); + + /* successive code goes into the true block */ LLVMPositionBuilderAtEnd(builder, ctx->true_block); } @@ -636,86 +662,71 @@ lp_build_if(struct lp_build_if_state *ctx, void lp_build_else(struct lp_build_if_state *ctx) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); + struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; + unsigned i; - ifthen = &lp_build_flow_peek(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - /* allocate the block for the else/false clause */ - ctx->false_block = LLVMAppendBasicBlock(function, "false block"); - /* XXX is this correct ??? */ + /* for each variable, update the Phi node with a (variable, block) pair */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); + } + + /* create/insert false_block before the merge block */ + ctx->false_block = LLVMInsertBasicBlock(ctx->merge_block, "if-false-block"); + + /* successive code goes into the else block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); } /** * End a conditional. - * This involves building a "merge" block at the endif which - * contains the phi instructions. */ void lp_build_endif(struct lp_build_if_state *ctx) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); - LLVMBasicBlockRef merge_block = LLVMAppendBasicBlock(function, "endif block"); - LLVMValueRef phi[LP_BUILD_FLOW_MAX_VARIABLES]; + struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; unsigned i; - /* build the endif/merge block now */ - /* XXX this is probably wrong */ - LLVMPositionBuilderAtEnd(ctx->builder, merge_block); - - ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; + ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - memset(phi, 0, sizeof(phi)); - - /* build phi nodes for any variables which were declared inside if part */ + if (ctx->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + /* for each variable, update the Phi node with a (variable, block) pair */ + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->false_block, 1); + } + } + else { + /* no else clause */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + for (i = 0; i < flow->num_variables; i++) { + LLVMValueRef undef; - for (i = 0; i < ifthen->num_variables; i++) { - LLVMValueRef *var = ctx->flow->variables[i]; - const char *name = LLVMGetValueName(*var); - unsigned j; + assert(*flow->variables[i]); - /* search true-clause variables list for 'name' */ - for (j = 0; j < ifthen->num_true_variables; j++) { - LLVMValueRef v = ifthen->true_variables[j]; - if (strcmp(LLVMGetValueName(v), name) == 0) { - /* add phi */ - if (!phi[i]) - phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); - LLVMAddIncoming(phi[i], &v, &ctx->true_block, 1); - } - } + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); - /* search false-clause variables list for 'name' */ - for (j = 0; j < ifthen->num_false_variables; j++) { - LLVMValueRef v = ifthen->false_variables[j]; - if (strcmp(LLVMGetValueName(v), name) == 0) { - /* add phi */ - if (!phi[i]) - phi[i] = LLVMBuildPhi(ctx->builder, LLVMTypeOf(*var), ""); - LLVMAddIncoming(phi[i], &v, &ctx->false_block, 1); - } + /* undef value from the block preceeding the 'if' */ + undef = LLVMGetUndef(LLVMTypeOf(*flow->variables[i])); + LLVMAddIncoming(ifthen->phi[i], &undef, &ctx->entry_block, 1); } - - /* "return" new phi variable to calling code */ - if (phi[i]) - *var = phi[i]; } /*** - *** Insert the various branch instructions here. - *** XXX need to verify all the builder/block positioning is correct. + *** Now patch in the various branch instructions. ***/ /* Insert the conditional branch instruction at the end of entry_block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->entry_block); - if (ctx->false_block) { /* we have an else clause */ LLVMBuildCondBr(ctx->builder, ctx->condition, @@ -724,60 +735,19 @@ lp_build_endif(struct lp_build_if_state *ctx) else { /* no else clause */ LLVMBuildCondBr(ctx->builder, ctx->condition, - ctx->true_block, merge_block); + ctx->true_block, ctx->merge_block); } /* Append an unconditional Br(anch) instruction on the true_block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->true_block); - LLVMBuildBr(ctx->builder, merge_block); + LLVMBuildBr(ctx->builder, ctx->merge_block); if (ctx->false_block) { /* Append an unconditional Br(anch) instruction on the false_block */ LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); - LLVMBuildBr(ctx->builder, merge_block); - } - - - /* Finish-up: continue building at end of the merge_block */ - /* XXX is this right? */ - LLVMPositionBuilderAtEnd(ctx->builder, merge_block); -} - - -/** - * Declare a variable that needs to be merged with another variable - * via a phi function. - * This function must be called after lp_build_if() and lp_build_endif(). - */ -void -lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var) -{ - struct lp_build_flow_if *ifthen; - const char *name; - - name = LLVMGetValueName(var); - assert(name && "variable requires a name"); - - /* make sure the var existed before the if/then/else */ - { - boolean found = FALSE; - uint i; - for (i = 0; i < ctx->flow->num_variables; i++) { - LLVMValueRef *var = ctx->flow->variables[i]; - if (strcmp(LLVMGetValueName(*var), name) == 0) { - found = TRUE; - break; - } - } - assert(found); + LLVMBuildBr(ctx->builder, ctx->merge_block); } - ifthen = &lp_build_flow_pop(ctx->flow, LP_BUILD_FLOW_IF)->ifthen; - if (ctx->false_block) { - ifthen->false_variables[ifthen->num_false_variables++] = var; - } - else { - assert(ctx->true_block); - ifthen->true_variables[ifthen->num_true_variables++] = var; - } + /* Resume building code at end of the ctx->merge_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1f294b8a49d..7c7cc402a38 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -132,7 +132,7 @@ struct lp_build_if_state LLVMBuilderRef builder; struct lp_build_flow_context *flow; LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block; + LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; @@ -142,9 +142,6 @@ lp_build_if(struct lp_build_if_state *ctx, LLVMBuilderRef builder, LLVMValueRef condition); -void -lp_build_if_phi_var(struct lp_build_if_state *ctx, LLVMValueRef var); - void lp_build_else(struct lp_build_if_state *ctx); -- cgit v1.2.3 From af31e65b5542147a53e4d3198eb8437f89457451 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 11:20:38 -0700 Subject: llvmpipe: free the phi array --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index a347cedf038..b7fa817e229 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -721,6 +721,8 @@ lp_build_endif(struct lp_build_if_state *ctx) } } + FREE(ifthen->phi); + /*** *** Now patch in the various branch instructions. ***/ -- cgit v1.2.3 From 855d7f51e4cfd6f4ce04bf34164676ba3bc2fc39 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 11:32:36 -0700 Subject: llvmpipe: move some fields to the private lp_build_flow_if struct --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 61 ++++++++++++++++-------------- src/gallium/drivers/llvmpipe/lp_bld_flow.h | 2 - 2 files changed, 32 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index b7fa817e229..161ec95d8c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -88,6 +88,9 @@ struct lp_build_flow_if unsigned num_variables; LLVMValueRef *phi; /**< array [num_variables] */ + + LLVMValueRef condition; + LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; @@ -623,14 +626,14 @@ lp_build_if(struct lp_build_if_state *ctx, memset(ctx, 0, sizeof(*ctx)); ctx->builder = builder; ctx->flow = flow; - ctx->condition = condition; - ctx->entry_block = block; /* push/create new scope */ ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); ifthen->num_variables = flow->num_variables; + ifthen->condition = condition; + ifthen->entry_block = block; /* create a Phi node for each variable in this flow scope */ ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); @@ -640,8 +643,8 @@ lp_build_if(struct lp_build_if_state *ctx, } /* create endif/merge basic block for the phi functions */ - ctx->merge_block = lp_build_insert_new_block(builder, "endif-block"); - LLVMPositionBuilderAtEnd(builder, ctx->merge_block); + ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); /* create a phi node for each variable */ for (i = 0; i < flow->num_variables; i++) @@ -649,10 +652,10 @@ lp_build_if(struct lp_build_if_state *ctx, /* create/insert true_block before merge_block */ - ctx->true_block = LLVMInsertBasicBlock(ctx->merge_block, "if-true-block"); + ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); /* successive code goes into the true block */ - LLVMPositionBuilderAtEnd(builder, ctx->true_block); + LLVMPositionBuilderAtEnd(builder, ifthen->true_block); } @@ -670,17 +673,17 @@ lp_build_else(struct lp_build_if_state *ctx) assert(ifthen); /* for each variable, update the Phi node with a (variable, block) pair */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); } /* create/insert false_block before the merge block */ - ctx->false_block = LLVMInsertBasicBlock(ctx->merge_block, "if-false-block"); + ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); /* successive code goes into the else block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); } @@ -697,27 +700,27 @@ lp_build_endif(struct lp_build_if_state *ctx) ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - if (ctx->false_block) { - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + if (ifthen->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); /* for each variable, update the Phi node with a (variable, block) pair */ for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->false_block, 1); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); } } else { /* no else clause */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); for (i = 0; i < flow->num_variables; i++) { LLVMValueRef undef; assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ctx->true_block, 1); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); /* undef value from the block preceeding the 'if' */ undef = LLVMGetUndef(LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(ifthen->phi[i], &undef, &ctx->entry_block, 1); + LLVMAddIncoming(ifthen->phi[i], &undef, &ifthen->entry_block, 1); } } @@ -728,28 +731,28 @@ lp_build_endif(struct lp_build_if_state *ctx) ***/ /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->entry_block); - if (ctx->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); + if (ifthen->false_block) { /* we have an else clause */ - LLVMBuildCondBr(ctx->builder, ctx->condition, - ctx->true_block, ctx->false_block); + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); } else { /* no else clause */ - LLVMBuildCondBr(ctx->builder, ctx->condition, - ctx->true_block, ctx->merge_block); + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); } /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->true_block); - LLVMBuildBr(ctx->builder, ctx->merge_block); - if (ctx->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { /* Append an unconditional Br(anch) instruction on the false_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->false_block); - LLVMBuildBr(ctx->builder, ctx->merge_block); + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); } - /* Resume building code at end of the ctx->merge_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ctx->merge_block); + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 7c7cc402a38..4c225a0d4f9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -131,8 +131,6 @@ struct lp_build_if_state { LLVMBuilderRef builder; struct lp_build_flow_context *flow; - LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; -- cgit v1.2.3 From 5208af7853989c30bea6ce8c4ac659a2f2304225 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 12:47:30 -0700 Subject: llvmpipe: fix more if/else/endif design bugs --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 161ec95d8c3..693742ff859 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -647,9 +647,12 @@ lp_build_if(struct lp_build_if_state *ctx, LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); /* create a phi node for each variable */ - for (i = 0; i < flow->num_variables; i++) + for (i = 0; i < flow->num_variables; i++) { ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + /* add add the initial value of the var from the entry block */ + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + } /* create/insert true_block before merge_block */ ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); @@ -706,21 +709,20 @@ lp_build_endif(struct lp_build_if_state *ctx) for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); + + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; } } else { /* no else clause */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); for (i = 0; i < flow->num_variables; i++) { - LLVMValueRef undef; - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - /* undef value from the block preceeding the 'if' */ - undef = LLVMGetUndef(LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(ifthen->phi[i], &undef, &ifthen->entry_block, 1); + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; } } -- cgit v1.2.3 From f4321fbd961a0a891c7f40b16efc61aa791e03a9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 8 Jan 2010 14:49:34 -0700 Subject: llvmpipe: optimize case when all four pixels are inside the triangle When the incoming c0,c1,c2 values are equal to INT_MIN it means that all pixels are inside the triangle. Thus we can skip the detailed pixel inside/outside triangle tests. Use the new lp_build_if()/endif() functions to generate the branching code. The code is disabled ATM however because it's actually a little slower than the original code. A little more tuning may fix that though... --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 106 +++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 3a669ba859a..293535387ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -58,6 +58,7 @@ * @author Jose Fonseca */ +#include #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_format.h" @@ -212,14 +213,16 @@ generate_tri_edge_mask(LLVMBuilderRef builder, m2_vec = step2_ptr[i] > c2_vec mask = m0_vec & m1_vec & m2_vec */ + struct lp_build_flow_context *flow; + struct lp_build_if_state ifctx; struct lp_type i32_type; - LLVMTypeRef i32vec4_type; + LLVMTypeRef i32vec4_type, mask_type; - LLVMValueRef index; LLVMValueRef c0_vec, c1_vec, c2_vec; - LLVMValueRef step0_vec, step1_vec, step2_vec; - LLVMValueRef m0_vec, m1_vec, m2_vec; - LLVMValueRef m; + + LLVMValueRef int_min_vec; + LLVMValueRef not_draw_all; + LLVMValueRef in_out_mask; assert(i < 4); @@ -233,6 +236,12 @@ generate_tri_edge_mask(LLVMBuilderRef builder, i32vec4_type = lp_build_int32_vec4_type(); + mask_type = LLVMIntType(32 * 4); + + /* int_min_vec = {INT_MIN, INT_MIN, INT_MIN, INT_MIN} */ + int_min_vec = lp_build_int_const_scalar(i32_type, INT_MIN); + + /* c0_vec = {c0, c0, c0, c0} * Note that we emit this code four times but LLVM optimizes away * three instances of it. @@ -240,34 +249,66 @@ generate_tri_edge_mask(LLVMBuilderRef builder, c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); lp_build_name(c1_vec, "edgeconst1vec"); lp_build_name(c2_vec, "edgeconst2vec"); - index = LLVMConstInt(LLVMInt32Type(), i, 0); - step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); - step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); - step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + /* + * Use a conditional here to do detailed pixel in/out testing. + * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN} + */ + flow = lp_build_flow_create(builder); + lp_build_flow_scope_begin(flow); - lp_build_name(step0_vec, "step0vec"); - lp_build_name(step1_vec, "step1vec"); - lp_build_name(step2_vec, "step2vec"); +#define OPTIMIZE_IN_OUT_TEST 0 +#if OPTIMIZE_IN_OUT_TEST + in_out_mask = lp_build_compare(builder, i32_type, PIPE_FUNC_EQUAL, c0_vec, int_min_vec); + lp_build_name(in_out_mask, "inoutmaskvec"); - m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); - m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); - m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); + not_draw_all = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, in_out_mask, mask_type, ""), + LLVMConstNull(mask_type), + ""); - m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); - m = LLVMBuildAnd(builder, m, m2_vec, ""); + lp_build_flow_scope_declare(flow, &in_out_mask); - lp_build_name(m, "inoutmaskvec"); + lp_build_if(&ifctx, flow, builder, not_draw_all); +#endif + { + LLVMValueRef step0_vec, step1_vec, step2_vec; + LLVMValueRef m0_vec, m1_vec, m2_vec; + LLVMValueRef index, m; + + index = LLVMConstInt(LLVMInt32Type(), i, 0); + step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); + step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); + step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + + lp_build_name(step0_vec, "step0vec"); + lp_build_name(step1_vec, "step1vec"); + lp_build_name(step2_vec, "step2vec"); + + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); + m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); + m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); + + m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); + in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); + lp_build_name(in_out_mask, "inoutmaskvec"); + + /* This is the initial alive/dead pixel mask. Additional bits will get cleared + * when the Z test fails, etc. + */ + } +#if OPTIMIZE_IN_OUT_TEST + lp_build_endif(&ifctx); +#endif - *mask = m; + lp_build_flow_scope_end(flow); + lp_build_flow_destroy(flow); - /* - * if mask = {0,0,0,0} skip quad - */ + *mask = in_out_mask; } @@ -432,6 +473,8 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); + + /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); @@ -737,24 +780,29 @@ generate_fragment(struct llvmpipe_context *lp, LLVMDisposeBuilder(builder); - /* - * Translate the LLVM IR into machine code. - */ + /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - LLVMDumpValue(variant->function); - assert(0); + if (1) + LLVMDumpValue(variant->function); + abort(); } #endif - LLVMRunFunctionPassManager(screen->pass, variant->function); + /* Apply optimizations to LLVM IR */ + if (1) + LLVMRunFunctionPassManager(screen->pass, variant->function); if (LP_DEBUG & DEBUG_JIT) { + /* Print the LLVM IR to stderr */ LLVMDumpValue(variant->function); debug_printf("\n"); } + /* + * Translate the LLVM IR into machine code. + */ variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); if (LP_DEBUG & DEBUG_ASM) -- cgit v1.2.3 From c1a04416023e24621e4992caf593e8dfe8d7a2fc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 10 Jan 2010 17:22:09 +0000 Subject: llvmpipe: initial mrt support Non-mrt apps work, and the code looks correct, but not many mrt test apps handy atm... --- src/gallium/drivers/llvmpipe/lp_flush.c | 7 +- src/gallium/drivers/llvmpipe/lp_jit.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 215 +++++++++++++++++----------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 6 +- src/gallium/drivers/llvmpipe/lp_setup.c | 18 +-- src/gallium/drivers/llvmpipe/lp_state.h | 6 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 114 +++++++++------ 7 files changed, 225 insertions(+), 143 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 9405150c4f7..16fb00092e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -77,8 +77,11 @@ llvmpipe_flush( struct pipe_context *pipe, if(flags & PIPE_FLUSH_FRAME) { static unsigned frame_no = 1; static char filename[256]; - util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[0]); + unsigned i; + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u.bmp", i, frame_no); + debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[i]); + } util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); debug_dump_surface_bmp(filename, llvmpipe->framebuffer.zsbuf); ++frame_no; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 1a6e939aa24..3b316914b02 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -108,7 +108,7 @@ typedef void const void *a0, const void *dadx, const void *dady, - void *color, + uint8_t **color, void *depth, const int32_t c1, const int32_t c2, diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6535e693089..38c27b90e35 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -53,6 +53,7 @@ lp_rast_begin( struct lp_rasterizer *rast, { struct pipe_screen *screen = rast->screen; struct pipe_surface *cbuf, *zsbuf; + int i; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -64,24 +65,27 @@ lp_rast_begin( struct lp_rasterizer *rast, rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || fb->height % TILE_SIZE != 0); - /* XXX support multiple color buffers here */ - cbuf = rast->state.fb.cbufs[0]; - if (cbuf) { - rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - fb->width, fb->height); - if (!rast->cbuf_transfer) - return FALSE; - - rast->cbuf_map = screen->transfer_map(rast->screen, - rast->cbuf_transfer); - if (!rast->cbuf_map) - return FALSE; + + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + cbuf = rast->state.fb.cbufs[i]; + if (cbuf) { + rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + cbuf->width, + cbuf->height); + if (!rast->cbuf_transfer[i]) + goto fail; + + rast->cbuf_map[i] = screen->transfer_map(rast->screen, + rast->cbuf_transfer[i]); + if (!rast->cbuf_map[i]) + goto fail; + } } zsbuf = rast->state.fb.zsbuf; @@ -93,17 +97,23 @@ lp_rast_begin( struct lp_rasterizer *rast, zsbuf->zslice, PIPE_TRANSFER_READ_WRITE, 0, 0, - fb->width, fb->height); + zsbuf->width, + zsbuf->height); if (!rast->zsbuf_transfer) - return FALSE; + goto fail; rast->zsbuf_map = screen->transfer_map(rast->screen, rast->zsbuf_transfer); if (!rast->zsbuf_map) - return FALSE; + goto fail; } return TRUE; + +fail: + /* Unmap and release transfers? + */ + return FALSE; } @@ -115,22 +125,26 @@ static void lp_rast_end( struct lp_rasterizer *rast ) { struct pipe_screen *screen = rast->screen; + unsigned i; - if (rast->cbuf_map) - screen->transfer_unmap(screen, rast->cbuf_transfer); + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + if (rast->cbuf_map[i]) + screen->transfer_unmap(screen, rast->cbuf_transfer[i]); + + if (rast->cbuf_transfer[i]) + screen->tex_transfer_destroy(rast->cbuf_transfer[i]); + + rast->cbuf_transfer[i] = NULL; + rast->cbuf_map[i] = NULL; + } if (rast->zsbuf_map) screen->transfer_unmap(screen, rast->zsbuf_transfer); - if (rast->cbuf_transfer) - screen->tex_transfer_destroy(rast->cbuf_transfer); - if (rast->zsbuf_transfer) screen->tex_transfer_destroy(rast->zsbuf_transfer); - rast->cbuf_transfer = NULL; rast->zsbuf_transfer = NULL; - rast->cbuf_map = NULL; rast->zsbuf_map = NULL; } @@ -161,8 +175,9 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const uint8_t *clear_color = arg.clear_color; - uint8_t *color_tile = rast->tasks[thread_index].tile.color; - + uint8_t **color_tile = rast->tasks[thread_index].tile.color; + unsigned i; + LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], clear_color[1], @@ -172,14 +187,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { - memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); + } } else { unsigned x, y, chan; - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan]; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (y = 0; y < TILE_SIZE; y++) + for (x = 0; x < TILE_SIZE; x++) + for (chan = 0; chan < 4; ++chan) + TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan]; } } @@ -214,28 +232,40 @@ void lp_rast_load_color( struct lp_rasterizer *rast, struct lp_rasterizer_task *task = &rast->tasks[thread_index]; const unsigned x = task->x; const unsigned y = task->y; - int w = TILE_SIZE; - int h = TILE_SIZE; + unsigned i; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - if (x + w > rast->state.fb.width) - w -= x + w - rast->state.fb.width; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + struct pipe_transfer *transfer = rast->cbuf_transfer[i]; + int w = TILE_SIZE; + int h = TILE_SIZE; - if (y + h > rast->state.fb.height) - h -= y + h - rast->state.fb.height; + if (x >= transfer->width) + continue; - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - - lp_tile_read_4ub(rast->cbuf_transfer->texture->format, - rast->tasks[thread_index].tile.color, - rast->cbuf_map, - rast->cbuf_transfer->stride, - x, y, - w, h); + if (y >= transfer->height) + continue; + /* XXX: require tile-size aligned render target dimensions: + */ + if (x + w > transfer->width) + w -= x + w - transfer->width; + + if (y + h > transfer->height) + h -= y + h - transfer->height; + + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + lp_tile_read_4ub(transfer->texture->format, + rast->tasks[thread_index].tile.color[i], + rast->cbuf_map[i], + transfer->stride, + x, y, + w, h); + } } @@ -313,8 +343,9 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, { const struct lp_rast_state *state = rast->tasks[thread_index].current_state; struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; - void *color; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; + unsigned i; unsigned ix, iy; int block_offset; @@ -336,14 +367,17 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, block_offset = ((iy/4)*(16*16) + (ix/4)*16); /* color buffer */ - color = tile->color + 4 * block_offset; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ depth = tile->depth + block_offset; + + #ifdef DEBUG - assert(lp_check_alignment(depth, 16)); - assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(tile->depth, 16)); + assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); @@ -360,8 +394,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, color, depth, c1, c2, c3, - inputs->step[0], inputs->step[1], inputs->step[2] - ); + inputs->step[0], inputs->step[1], inputs->step[2]); } @@ -377,29 +410,42 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, { const unsigned x = rast->tasks[thread_index].x; const unsigned y = rast->tasks[thread_index].y; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x + w > rast->state.fb.width) - w -= x + w - rast->state.fb.width; + unsigned i; - if (y + h > rast->state.fb.height) - h -= y + h - rast->state.fb.height; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + struct pipe_transfer *transfer = rast->cbuf_transfer[i]; + int w = TILE_SIZE; + int h = TILE_SIZE; - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); + if (x >= transfer->width) + continue; - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, - thread_index, x, y, w, h); + if (y >= transfer->height) + continue; - lp_tile_write_4ub(rast->cbuf_transfer->texture->format, - rast->tasks[thread_index].tile.color, - rast->cbuf_map, - rast->cbuf_transfer->stride, - x, y, - w, h); + /* XXX: require tile-size aligned render target dimensions: + */ + if (x + w > transfer->width) + w -= x + w - transfer->width; + + if (y + h > transfer->height) + h -= y + h - transfer->height; + + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, + thread_index, x, y, w, h); + + lp_tile_write_4ub(transfer->texture->format, + rast->tasks[thread_index].tile.color[i], + rast->cbuf_map[i], + transfer->stride, + x, y, + w, h); + } } @@ -600,7 +646,7 @@ lp_rasterize_scene( struct lp_rasterizer *rast, /* no threading */ lp_rast_begin( rast, fb, - fb->cbufs[0]!= NULL, + fb->nr_cbufs != 0, /* always write color if cbufs present */ fb->zsbuf != NULL && write_depth ); lp_scene_bin_iter_begin( scene ); @@ -667,7 +713,7 @@ thread_func( void *init_data ) write_depth = rast->curr_scene->write_depth; lp_rast_begin( rast, fb, - fb->cbufs[0] != NULL, + fb->nr_cbufs != 0, fb->zsbuf != NULL && write_depth ); } @@ -738,7 +784,7 @@ struct lp_rasterizer * lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) { struct lp_rasterizer *rast; - unsigned i; + unsigned i, cbuf; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) @@ -750,7 +796,9 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { - rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) + rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); rast->tasks[i].rast = rast; rast->tasks[i].thread_index = i; @@ -769,13 +817,14 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) */ void lp_rast_destroy( struct lp_rasterizer *rast ) { - unsigned i; + unsigned i, cbuf; util_unreference_framebuffer_state(&rast->state.fb); for (i = 0; i < Elements(rast->tasks); i++) { align_free(rast->tasks[i].tile.depth); - align_free(rast->tasks[i].tile.color); + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) + align_free(rast->tasks[i].tile.color[cbuf]); } /* for synchronizing rasterization threads */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index cd72d7e69d8..5afdeab049c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -46,7 +46,7 @@ struct lp_rasterizer; */ struct lp_rast_tile { - uint8_t *color; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; uint32_t *depth; }; @@ -87,9 +87,9 @@ struct lp_rasterizer /* Framebuffer stuff */ struct pipe_screen *screen; - struct pipe_transfer *cbuf_transfer; + struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; struct pipe_transfer *zsbuf_transfer; - void *cbuf_map; + void *cbuf_map[PIPE_MAX_COLOR_BUFS]; void *zsbuf_map; struct { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 5cdcf4ecc98..74f3054864c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -155,26 +155,26 @@ begin_binning( struct setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (setup->fb.cbufs[0]) { + if (setup->fb.nr_cbufs) { if (setup->clear.flags & PIPE_CLEAR_COLOR) lp_scene_bin_everywhere( scene, - lp_rast_clear_color, - setup->clear.color ); + lp_rast_clear_color, + setup->clear.color ); else lp_scene_bin_everywhere( scene, - lp_rast_load_color, - lp_rast_arg_null() ); + lp_rast_load_color, + lp_rast_arg_null() ); } if (setup->fb.zsbuf) { if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) lp_scene_bin_everywhere( scene, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_rast_clear_zstencil, + setup->clear.zstencil ); else lp_scene_bin_everywhere( scene, - lp_rast_load_zstencil, - lp_rast_arg_null() ); + lp_rast_load_zstencil, + lp_rast_arg_null() ); } LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 25d13536741..cb240cb6e55 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -67,10 +67,16 @@ struct lp_fragment_shader; struct lp_fragment_shader_variant_key { enum pipe_format zsbuf_format; + unsigned nr_cbufs; + struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + struct { + ubyte colormask; + } cbuf_blend[PIPE_MAX_COLOR_BUFS]; + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 293535387ab..01912d6ea2d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -327,7 +327,7 @@ generate_fs(struct llvmpipe_context *lp, const struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, - LLVMValueRef *color, + LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, LLVMValueRef c0, LLVMValueRef c1, @@ -348,6 +348,7 @@ generate_fs(struct llvmpipe_context *lp, boolean early_depth_test; unsigned attrib; unsigned chan; + unsigned cbuf; assert(i < 4); @@ -364,9 +365,11 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_begin(flow); /* Declare the color and z variables */ - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[chan]); + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[cbuf][chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + } } lp_build_flow_scope_declare(flow, &z); @@ -407,6 +410,7 @@ generate_fs(struct llvmpipe_context *lp, /* Alpha test */ /* XXX: should the alpha reference value be passed separately? */ + /* XXX: should only test the final assignment to alpha */ if(cbuf == 0 && chan == 3) { LLVMValueRef alpha = outputs[attrib][chan]; LLVMValueRef alpha_ref_value; @@ -416,9 +420,7 @@ generate_fs(struct llvmpipe_context *lp, &mask, alpha, alpha_ref_value); } - if(cbuf == 0) - color[chan] = outputs[attrib][chan]; - + color[cbuf][chan] = outputs[attrib][chan]; break; } @@ -539,7 +541,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; - LLVMValueRef color_ptr; + LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; @@ -549,12 +551,13 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; + LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; unsigned num_fs; unsigned i; unsigned chan; + unsigned cbuf; if (LP_DEBUG & DEBUG_JIT) { tgsi_dump(shader->base.tokens, 0); @@ -651,7 +654,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ - arg_types[6] = LLVMPointerType(blend_vec_type, 0); /* color */ + arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[8] = LLVMInt32Type(); /* c0 */ arg_types[9] = LLVMInt32Type(); /* c1 */ @@ -667,6 +670,10 @@ generate_fragment(struct llvmpipe_context *lp, variant->function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + + /* XXX: need to propagate noalias down into color param now we are + * passing a pointer-to-pointer? + */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); @@ -677,7 +684,7 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr = LLVMGetParam(variant->function, 3); dadx_ptr = LLVMGetParam(variant->function, 4); dady_ptr = LLVMGetParam(variant->function, 5); - color_ptr = LLVMGetParam(variant->function, 6); + color_ptr_ptr = LLVMGetParam(variant->function, 6); depth_ptr = LLVMGetParam(variant->function, 7); c0 = LLVMGetParam(variant->function, 8); c1 = LLVMGetParam(variant->function, 9); @@ -692,7 +699,7 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); - lp_build_name(color_ptr, "color"); + lp_build_name(color_ptr_ptr, "color_ptr"); lp_build_name(depth_ptr, "depth"); lp_build_name(c0, "c0"); lp_build_name(c1, "c1"); @@ -721,8 +728,9 @@ generate_fragment(struct llvmpipe_context *lp, /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef out_color[NUM_CHANNELS]; + LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; + int cbuf; if(i != 0) lp_build_interp_soa_update(&interp, i); @@ -742,40 +750,50 @@ generate_fragment(struct llvmpipe_context *lp, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); - for(chan = 0; chan < NUM_CHANNELS; ++chan) - fs_out_color[chan][i] = out_color[chan]; + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) + for(chan = 0; chan < NUM_CHANNELS; ++chan) + fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; } sampler->destroy(sampler); - /* - * Convert the fs's output color and mask to fit to the blending type. + /* Loop over color outputs / color buffers to do blending. */ + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + LLVMValueRef color_ptr; + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - lp_build_conv(builder, fs_type, blend_type, - fs_out_color[chan], num_fs, - &blend_in_color[chan], 1); - lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]); + /* + * Convert the fs's output color and mask to fit to the blending type. + */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + lp_build_conv(builder, fs_type, blend_type, + fs_out_color[cbuf][chan], num_fs, + &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); + } + lp_build_conv_mask(builder, fs_type, blend_type, + fs_mask, num_fs, + &blend_mask, 1); + + color_ptr = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), + ""); + lp_build_name(color_ptr, "color_ptr%d", cbuf); + + /* + * Blending. + */ + generate_blend(&key->blend, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr); } - lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); - - /* - * Blending. - */ - - generate_blend(&key->blend, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); - LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -940,21 +958,27 @@ make_variant_key(struct llvmpipe_context *lp, key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - if(lp->framebuffer.cbufs[0]) { + if (lp->framebuffer.nr_cbufs) { + memcpy(&key->blend, lp->blend, sizeof key->blend); + } + + key->nr_cbufs = lp->framebuffer.nr_cbufs; + for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { const struct util_format_description *format_desc; unsigned chan; - memcpy(&key->blend, lp->blend, sizeof key->blend); - - format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); - /* mask out color channels not present in the color buffer */ + /* mask out color channels not present in the color buffer. + * Should be simple to incorporate per-cbuf writemasks: + */ for(chan = 0; chan < 4; ++chan) { enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - if(swizzle > 4) - key->blend.colormask &= ~(1 << chan); + + if(swizzle <= UTIL_FORMAT_SWIZZLE_W) + key->cbuf_blend[i].colormask |= (1 << chan); } } -- cgit v1.2.3 From 16c1ad54bc5f3dc47a8a17cbc3724d913f5da005 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Jan 2010 18:41:42 +0000 Subject: llvmpipe: remove scissor cliprect derived state Was previously calculating the intersection of the scissor rectangle and the framebuffer dimensions. Rendering is already restricted to framebuffer dimensions by other means, so scissor testing (when implemented) can just use the scissor state directly. --- src/gallium/drivers/llvmpipe/lp_context.h | 3 -- src/gallium/drivers/llvmpipe/lp_state_derived.c | 38 ------------------------- 2 files changed, 41 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 1ede6a6a72f..5390a713699 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -96,9 +96,6 @@ struct llvmpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /** Derived from scissor and surface bounds: */ - struct pipe_scissor_state cliprect; - /** The tiling engine */ struct setup_context *setup; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 78d046985b9..af02fcfec90 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -136,39 +136,6 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) -/** - * Recompute cliprect from scissor bounds, scissor enable and surface size. - */ -static void -compute_cliprect(struct llvmpipe_context *lp) -{ - /* LP_NEW_FRAMEBUFFER - */ - uint surfWidth = lp->framebuffer.width; - uint surfHeight = lp->framebuffer.height; - - /* LP_NEW_RASTERIZER - */ - if (lp->rasterizer->scissor) { - - /* LP_NEW_SCISSOR - * - * clip to scissor rect: - */ - lp->cliprect.minx = MAX2(lp->scissor.minx, 0); - lp->cliprect.miny = MAX2(lp->scissor.miny, 0); - lp->cliprect.maxx = MIN2(lp->scissor.maxx, surfWidth); - lp->cliprect.maxy = MIN2(lp->scissor.maxy, surfHeight); - } - else { - /* clip to surface bounds */ - lp->cliprect.minx = 0; - lp->cliprect.miny = 0; - lp->cliprect.maxx = surfWidth; - lp->cliprect.maxy = surfHeight; - } -} - /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. @@ -189,11 +156,6 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_VS)) compute_vertex_info( llvmpipe ); - if (llvmpipe->dirty & (LP_NEW_SCISSOR | - LP_NEW_RASTERIZER | - LP_NEW_FRAMEBUFFER)) - compute_cliprect(llvmpipe); - if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | LP_NEW_DEPTH_STENCIL_ALPHA | -- cgit v1.2.3 From 094525fb23127e7ca253d732207bbbbe00488a6b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 8 Jan 2010 13:55:32 +0000 Subject: llvmpipe: remove opencoded constant --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index fe34903cf32..5197dca8f99 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -287,10 +287,10 @@ do_triangle_ccw(struct setup_context *setup, } /* Bounding rectangle (in pixels) */ - tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER; - tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER; - tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER; - tri->maxy = (MAX3(y1, y2, y3) + 0xf) >> FIXED_ORDER; + tri->minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + tri->maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + tri->miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + tri->maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; if (tri->miny == tri->maxy || tri->minx == tri->maxx) { -- cgit v1.2.3 From ad74ea286951634d49d500f2e5ce740072794fe2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 11 Jan 2010 12:02:53 +0000 Subject: st/mesa: early exit on error path Can't rely on asserts having any effect on flowcontrol for release builds. --- src/mesa/state_tracker/st_cb_condrender.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/mesa/state_tracker/st_cb_condrender.c b/src/mesa/state_tracker/st_cb_condrender.c index 780b40c2065..e2cd80b4046 100644 --- a/src/mesa/state_tracker/st_cb_condrender.c +++ b/src/mesa/state_tracker/st_cb_condrender.c @@ -69,6 +69,7 @@ st_BeginConditionalRender(GLcontext *ctx, struct gl_query_object *q, break; default: assert(0 && "bad mode in st_BeginConditionalRender"); + return; } pipe->render_condition(pipe, stq->pq, m); -- cgit v1.2.3 From 86f450060debebd66dd5fb72f83800d7634efeaa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 11 Jan 2010 12:06:51 +0000 Subject: llvmpipe: force constant interpolation of flatshade colors Nice speedup for gears. --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 11 ++++++++++- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 1 + src/gallium/drivers/llvmpipe/lp_state.h | 6 +++--- src/gallium/drivers/llvmpipe/lp_state_derived.c | 1 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 7 ++++++- 5 files changed, 21 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index daedf40d558..a6acaead887 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -316,6 +316,7 @@ pos_update(struct lp_build_interp_soa_context *bld, int quad_index) void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, + boolean flatshade, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, @@ -358,7 +359,15 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, for( attrib = first; attrib <= last; ++attrib ) { bld->mask[1 + attrib] = mask; - bld->mode[1 + attrib] = decl->Declaration.Interpolate; + + /* XXX: have mesa set INTERP_CONSTANT in the fragment + * shader. + */ + if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + flatshade) + bld->mode[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; + else + bld->mode[1 + attrib] = decl->Declaration.Interpolate; } bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index e2b3bc1bf0b..ca958cdf343 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -79,6 +79,7 @@ struct lp_build_interp_soa_context void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, + boolean flatshade, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index cb240cb6e55..4c6747bb2b6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -66,12 +66,12 @@ struct lp_fragment_shader; struct lp_fragment_shader_variant_key { - enum pipe_format zsbuf_format; - unsigned nr_cbufs; - struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + enum pipe_format zsbuf_format; + unsigned nr_cbufs:8; + unsigned flatshade:1; struct { ubyte colormask; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index af02fcfec90..632cafa2e6e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -159,6 +159,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_RASTERIZER | LP_NEW_SAMPLER | LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 01912d6ea2d..7ce72027770 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -718,7 +718,10 @@ generate_fragment(struct llvmpipe_context *lp, generate_pos0(builder, x, y, &x0, &y0); - lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, + lp_build_interp_soa_init(&interp, + shader->base.tokens, + key->flatshade, + builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0); @@ -958,6 +961,8 @@ make_variant_key(struct llvmpipe_context *lp, key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ + key->flatshade = lp->rasterizer->flatshade; + if (lp->framebuffer.nr_cbufs) { memcpy(&key->blend, lp->blend, sizeof key->blend); } -- cgit v1.2.3 From 46b5bd6cadd13f47c10aafe9194c90234db91a2a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 12:59:36 -0700 Subject: llvmpipe: do the all-in test on the scalar c0 instead of vector c0 This still isn't faster, but committing it for posterity. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 45 ++++++++++++++---------------- 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7ce72027770..6816db4387f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -217,10 +217,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, struct lp_build_if_state ifctx; struct lp_type i32_type; LLVMTypeRef i32vec4_type, mask_type; - LLVMValueRef c0_vec, c1_vec, c2_vec; - - LLVMValueRef int_min_vec; LLVMValueRef not_draw_all; LLVMValueRef in_out_mask; @@ -238,21 +235,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, mask_type = LLVMIntType(32 * 4); - /* int_min_vec = {INT_MIN, INT_MIN, INT_MIN, INT_MIN} */ - int_min_vec = lp_build_int_const_scalar(i32_type, INT_MIN); - - - /* c0_vec = {c0, c0, c0, c0} - * Note that we emit this code four times but LLVM optimizes away - * three instances of it. - */ - c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); - c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); - c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); - lp_build_name(c1_vec, "edgeconst1vec"); - lp_build_name(c2_vec, "edgeconst2vec"); - /* * Use a conditional here to do detailed pixel in/out testing. * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN} @@ -260,17 +242,19 @@ generate_tri_edge_mask(LLVMBuilderRef builder, flow = lp_build_flow_create(builder); lp_build_flow_scope_begin(flow); -#define OPTIMIZE_IN_OUT_TEST 0 + { +#define OPTIMIZE_IN_OUT_TEST 1 #if OPTIMIZE_IN_OUT_TEST - in_out_mask = lp_build_compare(builder, i32_type, PIPE_FUNC_EQUAL, c0_vec, int_min_vec); - lp_build_name(in_out_mask, "inoutmaskvec"); not_draw_all = LLVMBuildICmp(builder, - LLVMIntEQ, - LLVMBuildBitCast(builder, in_out_mask, mask_type, ""), - LLVMConstNull(mask_type), + LLVMIntNE, + c0, + LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), ""); + in_out_mask = lp_build_int_const_scalar(i32_type, ~0); + + lp_build_flow_scope_declare(flow, &in_out_mask); lp_build_if(&ifctx, flow, builder, not_draw_all); @@ -280,6 +264,18 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef m0_vec, m1_vec, m2_vec; LLVMValueRef index, m; + /* c0_vec = {c0, c0, c0, c0} + * Note that we emit this code four times but LLVM optimizes away + * three instances of it. + */ + c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); + c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); + c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + lp_build_name(c0_vec, "edgeconst0vec"); + lp_build_name(c1_vec, "edgeconst1vec"); + lp_build_name(c2_vec, "edgeconst2vec"); + + index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); @@ -305,6 +301,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_endif(&ifctx); #endif + } lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); -- cgit v1.2.3 From 3b5d84926847cf2008da4e2dc146090d0c1b5402 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 13:16:00 -0700 Subject: llvmpipe: refactor generate_fragment() code This will make it easier to generate multiple versions of the fragment code per variant. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 138 ++++++++++++++++------------- 1 file changed, 76 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 6816db4387f..c4ca0f1d5e4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -515,13 +515,13 @@ generate_blend(const struct pipe_blend_state *blend, * pixels at at time. The block contains 2x2 quads. Each quad contains * 2x2 pixels. */ -static struct lp_fragment_shader_variant * +static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, - const struct lp_fragment_shader_variant_key *key) + struct lp_fragment_shader_variant *variant) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); - struct lp_fragment_shader_variant *variant; + const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; @@ -556,64 +556,6 @@ generate_fragment(struct llvmpipe_context *lp, unsigned chan; unsigned cbuf; - if (LP_DEBUG & DEBUG_JIT) { - tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); - debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.blend_enable) { - debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); - } - debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if(key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - pf_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - debug_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); - } - } - } - - variant = CALLOC_STRUCT(lp_fragment_shader_variant); - if(!variant) - return NULL; - - variant->shader = shader; - memcpy(&variant->key, key, sizeof *key); /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@ -828,6 +770,78 @@ generate_fragment(struct llvmpipe_context *lp, variant->next = shader->variants; shader->variants = variant; +} + + +static struct lp_fragment_shader_variant * +generate_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, + const struct lp_fragment_shader_variant_key *key) +{ + struct lp_fragment_shader_variant *variant; + + if (LP_DEBUG & DEBUG_JIT) { + unsigned i; + + tgsi_dump(shader->base.tokens, 0); + if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); + debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + if(key->alpha.enabled) { + debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); + debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); + } + if(key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); + } + else if(key->blend.blend_enable) { + debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); + debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } + } + } + + variant = CALLOC_STRUCT(lp_fragment_shader_variant); + if(!variant) + return NULL; + + variant->shader = shader; + memcpy(&variant->key, key, sizeof *key); + + generate_fragment(lp, shader, variant); return variant; } @@ -1008,7 +1022,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } if(!variant) - variant = generate_fragment(lp, shader, &key); + variant = generate_variant(lp, shader, &key); shader->current = variant; -- cgit v1.2.3 From 9a10d14a441ca76c5c9ea8986a2eb4b5923a9b9f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 15:30:17 -0700 Subject: llvmpipe: move, update comments --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c4ca0f1d5e4..8939e7e6223 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -204,15 +204,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef step1_ptr, /* ivec4 */ LLVMValueRef step2_ptr) /* ivec4 */ { - /* - c0_vec = splat(c0) - c1_vec = splat(c1) - c2_vec = splat(c2) - m0_vec = step0_ptr[i] > c0_vec - m1_vec = step1_ptr[i] > c1_vec - m2_vec = step2_ptr[i] > c2_vec - mask = m0_vec & m1_vec & m2_vec - */ struct lp_build_flow_context *flow; struct lp_build_if_state ifctx; struct lp_type i32_type; @@ -237,7 +228,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, /* * Use a conditional here to do detailed pixel in/out testing. - * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN} + * We only have to do this if c0 != INT_MIN. */ flow = lp_build_flow_create(builder); lp_build_flow_scope_begin(flow); @@ -245,7 +236,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, { #define OPTIMIZE_IN_OUT_TEST 1 #if OPTIMIZE_IN_OUT_TEST - + /* not_draw_all = (c0 != INT_MIN) */ not_draw_all = LLVMBuildICmp(builder, LLVMIntNE, c0, @@ -257,6 +248,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_declare(flow, &in_out_mask); + /* if (not_draw_all) {... */ lp_build_if(&ifctx, flow, builder, not_draw_all); #endif { @@ -275,27 +267,24 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_name(c1_vec, "edgeconst1vec"); lp_build_name(c2_vec, "edgeconst2vec"); - + /* load step0vec, step1, step2 vec from memory */ index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); - lp_build_name(step0_vec, "step0vec"); lp_build_name(step1_vec, "step1vec"); lp_build_name(step2_vec, "step2vec"); + /* m0_vec = step0_ptr[i] > c0_vec */ m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); + /* in_out_mask = m0_vec & m1_vec & m2_vec */ m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); lp_build_name(in_out_mask, "inoutmaskvec"); - - /* This is the initial alive/dead pixel mask. Additional bits will get cleared - * when the Z test fails, etc. - */ } #if OPTIMIZE_IN_OUT_TEST lp_build_endif(&ifctx); @@ -305,6 +294,10 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); + /* This is the initial alive/dead pixel mask for a quad of four pixels. + * It's an int[4] vector with each word set to 0 or ~0. + * Words will get cleared when pixels faile the Z test, etc. + */ *mask = in_out_mask; } -- cgit v1.2.3 From 5cf46309698dc53ee8c383b9510ee21a3d74015e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 15:30:54 -0700 Subject: llvmpipe: disable the all in/out test code for now It's still faster not to try to special case the "all pixels are known to be inside the triangle" case. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 8939e7e6223..2db8ab2566d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -234,7 +234,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_begin(flow); { -#define OPTIMIZE_IN_OUT_TEST 1 +#define OPTIMIZE_IN_OUT_TEST 0 #if OPTIMIZE_IN_OUT_TEST /* not_draw_all = (c0 != INT_MIN) */ not_draw_all = LLVMBuildICmp(builder, -- cgit v1.2.3 From a32e9b2a2d62d64b9c22c4c7af9e0f009189b0a2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 10:11:36 -0700 Subject: llvmpipe: remove unused #define --- src/gallium/drivers/llvmpipe/lp_bld_flow.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 693742ff859..bc831389085 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -41,9 +41,6 @@ #define LP_BUILD_FLOW_MAX_VARIABLES 32 #define LP_BUILD_FLOW_MAX_DEPTH 32 -#define LP_BUILD_IF_MAX_VARIABLES 8 - - /** * Enumeration of all possible flow constructs. */ -- cgit v1.2.3 From 4061ca02dd837950201a9ada462f944ae25deeb5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 13:01:32 -0700 Subject: llvmpipe: silence unused var warnings --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2db8ab2566d..c6f58018762 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -204,12 +204,15 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef step1_ptr, /* ivec4 */ LLVMValueRef step2_ptr) /* ivec4 */ { - struct lp_build_flow_context *flow; +#define OPTIMIZE_IN_OUT_TEST 0 +#if OPTIMIZE_IN_OUT_TEST struct lp_build_if_state ifctx; + LLVMValueRef not_draw_all; +#endif + struct lp_build_flow_context *flow; struct lp_type i32_type; LLVMTypeRef i32vec4_type, mask_type; LLVMValueRef c0_vec, c1_vec, c2_vec; - LLVMValueRef not_draw_all; LLVMValueRef in_out_mask; assert(i < 4); @@ -234,7 +237,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_flow_scope_begin(flow); { -#define OPTIMIZE_IN_OUT_TEST 0 #if OPTIMIZE_IN_OUT_TEST /* not_draw_all = (c0 != INT_MIN) */ not_draw_all = LLVMBuildICmp(builder, -- cgit v1.2.3 From de10168a462f57ead41800ea135476bb5ae8c678 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:06:19 -0700 Subject: llvmpipe: added lp_scene_is_empty() --- src/gallium/drivers/llvmpipe/lp_scene.c | 22 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene.h | 2 ++ 2 files changed, 24 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 774a1fecd71..70d5847d8ec 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -66,6 +66,28 @@ lp_scene_init(struct lp_scene *scene) } +/** + * Check if the scene's bins are all empty. + * For debugging purposes. + */ +boolean +lp_scene_is_empty(struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < TILES_Y; y++) { + for (x = 0; x < TILES_X; x++) { + const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + const struct cmd_block_list *list = &bin->commands; + if (list->head != list->tail || list->head->count > 0) { + return FALSE; + } + } + } + return TRUE; +} + + /** * Set scene to empty state. */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 796fc516ccc..72557277851 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -133,6 +133,8 @@ void lp_scene_destroy(struct lp_scene *scene); void lp_scene_init(struct lp_scene *scene); +boolean lp_scene_is_empty(struct lp_scene *scene ); + void lp_scene_reset(struct lp_scene *scene ); void lp_scene_free_bin_data(struct lp_scene *scene); -- cgit v1.2.3 From 214ffad01598c8780417b9fa9df75e951c8ac049 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:08:07 -0700 Subject: llvmpipe: clamp maxx,maxy to framebuffer size (in terms of tiles) In some corner cases the right-most / bottom-most vertex can be right on the edge of the framebuffer. Because the maxx, maxy vals are computed with a series of float/int, pixel/tile transformations we can end up with maxx >= scene->x_tiles or maxy >= scene->y_tiles. This leads to putting data into bins that never get processed, or reset. This becomes stale data that can lead to segfaults. Clamping fixes this. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 5197dca8f99..9248125de8b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -389,6 +389,11 @@ do_triangle_ccw(struct setup_context *setup, maxx = tri->maxx / TILE_SIZE; maxy = tri->maxy / TILE_SIZE; + /* Clamp maxx, maxy to framebuffer size + */ + maxx = MIN2(maxx, scene->tiles_x - 1); + maxy = MIN2(maxy, scene->tiles_y - 1); + /* Determine which tile(s) intersect the triangle's bounding box */ if (miny == maxy && minx == maxx) -- cgit v1.2.3 From c560b97b17a009f5ea8423523cd3a70fe7b506e9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:11:40 -0700 Subject: llvmpipe: assert that we're putting data into a valid bin --- src/gallium/drivers/llvmpipe/lp_scene.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 72557277851..b59b6870026 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -227,6 +227,9 @@ lp_scene_bin_command( struct lp_scene *scene, struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); struct cmd_block_list *list = &bin->commands; + assert(x < scene->tiles_x); + assert(y < scene->tiles_y); + if (list->tail->count == CMD_BLOCK_MAX) { lp_bin_new_cmd_block( list ); } -- cgit v1.2.3 From ec9cfac7682268a2d81d6edaf81f8fbb770e17ed Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:12:07 -0700 Subject: llvmpipe: debug checks: make sure scene is empty at key points --- src/gallium/drivers/llvmpipe/lp_scene.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 70d5847d8ec..7ec3206b817 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -116,6 +116,8 @@ lp_scene_reset(struct lp_scene *scene ) } } + assert(lp_scene_is_empty(scene)); + /* Free all but last binned data block: */ { @@ -163,6 +165,8 @@ void lp_scene_set_framebuffer_size( struct lp_scene *scene, unsigned width, unsigned height ) { + assert(lp_scene_is_empty(scene)); + scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; } -- cgit v1.2.3 From 7e4c75c040bfd93fafb3a3ebbda25db8bd948e18 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Jan 2010 17:12:49 -0700 Subject: llvmpipe: fix indentation, comment typo --- src/gallium/drivers/llvmpipe/lp_scene.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 7ec3206b817..45d54462678 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -163,7 +163,7 @@ lp_scene_free_bin_data(struct lp_scene *scene) void lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ) + unsigned width, unsigned height ) { assert(lp_scene_is_empty(scene)); @@ -259,8 +259,8 @@ lp_replace_last_command_arg( struct cmd_bin *bin, */ void lp_scene_bin_state_command( struct lp_scene *scene, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) { unsigned i, j; for (i = 0; i < scene->tiles_x; i++) { @@ -303,7 +303,7 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ) /** - * Return point to next bin to be rendered. + * Return pointer to next bin to be rendered. * The lp_scene::curr_x and ::curr_y fields will be advanced. * Multiple rendering threads will call this function to get a chunk * of work (a bin) to work on. -- cgit v1.2.3 From da45f49cc63fff06513dc28d9616084fc81798d4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 14:41:02 +0000 Subject: llvmpipe: quick hack to short-circuit empty bins --- src/gallium/drivers/llvmpipe/lp_rast.c | 23 ++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup.c | 4 +++- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 38c27b90e35..4c13d4d80b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -576,6 +576,26 @@ rasterize_bin( struct lp_rasterizer *rast, lp_rast_end_tile( rast, thread_index ); } +static boolean +is_empty_bin( struct lp_rasterizer *rast, + const struct cmd_bin *bin ) +{ + const struct cmd_block *head = bin->commands.head; + int i; + + if (head->next != NULL || + head->count > PIPE_MAX_COLOR_BUFS + 1) + return FALSE; + + for (i = 0; i < head->count; i++) + if (head->cmd[i] != lp_rast_load_color && + head->cmd[i] != lp_rast_load_zstencil) + return FALSE; + + return TRUE; +} + + /** * Rasterize/execute all bins within a scene. @@ -606,7 +626,8 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { - rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + if (!is_empty_bin( rast, bin )) + rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } } #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 74f3054864c..38ea0c663f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -153,7 +153,9 @@ begin_binning( struct setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s color: %s depth: %s\n", __FUNCTION__, + (setup->clear.flags & PIPE_CLEAR_COLOR) ? "clear": "load", + (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) ? "clear": "load"); if (setup->fb.nr_cbufs) { if (setup->clear.flags & PIPE_CLEAR_COLOR) -- cgit v1.2.3 From f4b29e6ad38939318ce233ad28c70a608e7db0bd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 15:49:24 +0000 Subject: llvmpipe: improve empty-bin test We emit at most two clear packets (color and z respectively). --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 4c13d4d80b5..9606418a375 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -584,7 +584,7 @@ is_empty_bin( struct lp_rasterizer *rast, int i; if (head->next != NULL || - head->count > PIPE_MAX_COLOR_BUFS + 1) + head->count > 2) return FALSE; for (i = 0; i < head->count; i++) -- cgit v1.2.3 From db83ad4b4353ea6f9c755f18bf1455ea78b5bf12 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 16:29:39 +0000 Subject: llvmpipe: improve empty-bin test further Remove unused param, add comments. Thanks to Brian for review. --- src/gallium/drivers/llvmpipe/lp_rast.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 9606418a375..0a8d730580a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -576,13 +576,23 @@ rasterize_bin( struct lp_rasterizer *rast, lp_rast_end_tile( rast, thread_index ); } +/* An empty bin is one that just loads the contents of the tile and + * stores them again unchanged. This typically happens when bins have + * been flushed for some reason in the middle of a frame, or when + * incremental updates are being made to a render target. + * + * Try to avoid doing pointless work in this case. + */ static boolean -is_empty_bin( struct lp_rasterizer *rast, - const struct cmd_bin *bin ) +is_empty_bin( const struct cmd_bin *bin ) { const struct cmd_block *head = bin->commands.head; int i; + /* We emit at most two load-tile commands at the start of the first + * command block. If there are more than two commands in the + * block, we know that the bin is non-empty. + */ if (head->next != NULL || head->count > 2) return FALSE; @@ -626,7 +636,7 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { - if (!is_empty_bin( rast, bin )) + if (!is_empty_bin( bin )) rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); } } -- cgit v1.2.3 From 212f3a6cb3cb49127a6e5588553fe2d327f46563 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 09:29:43 -0700 Subject: llvmpipe: tweak subpixel_snap() arithmetic This adjustment fixes some rasterization differences between llvmpipe and softpipe (and other renderers). --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 9248125de8b..575265b0f50 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -237,7 +237,7 @@ static void setup_tri_coefficients( struct setup_context *setup, static inline int subpixel_snap( float a ) { - return util_iround(FIXED_ONE * a); + return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); } -- cgit v1.2.3 From 4439aab7b73c235b64df60f9f62fda3492dbfdc5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 09:31:36 -0700 Subject: llvmpipe: comments and LLVMValueRef naming --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 2df86dd32e5..54b31befe6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -874,6 +874,9 @@ lp_build_iround(struct lp_build_context *bld, } +/** + * Convert float[] to int[] with floor(). + */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) @@ -900,6 +903,7 @@ lp_build_ifloor(struct lp_build_context *bld, sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); @@ -908,11 +912,14 @@ lp_build_ifloor(struct lp_build_context *bld, /* offset = a < 0 ? -0.99999(9)f : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); + lp_build_name(offset, "floor.offset"); res = LLVMBuildAdd(bld->builder, a, offset, ""); + lp_build_name(res, "floor.res"); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + lp_build_name(res, "floor"); return res; } -- cgit v1.2.3 From 0bb5c3060f8784d6d6828b1455e736cd8f6416cb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 09:32:10 -0700 Subject: llvmpipe: added debug warning --- src/gallium/drivers/llvmpipe/lp_texture.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 2462378152a..a953e8845a0 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -177,6 +177,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, return &lpt->base; #else + debug_printf("llvmpipe_texture_blanket() not implemented!"); return NULL; #endif } -- cgit v1.2.3 From 95ee14f147e713bd132dc56a1151232957752c90 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 16:52:17 +0000 Subject: llvmpipe: implement lp_rast_load_zstencil Load zbuffer contents for binned scenes that don't start with a clear and which have a bound zbuffer. --- src/gallium/drivers/llvmpipe/lp_rast.c | 36 ++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 0a8d730580a..7753f9bb3f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -269,6 +269,23 @@ void lp_rast_load_color( struct lp_rasterizer *rast, } +static void +lp_tile_read_z32(uint32_t *tile, + const uint8_t *map, + unsigned map_stride, + unsigned x0, unsigned y0, unsigned w, unsigned h) +{ + unsigned x, y; + const uint8_t *map_row = map + y0*map_stride; + for (y = 0; y < h; ++y) { + const uint32_t *map_pixel = (uint32_t *)(map_row + x0*4); + for (x = 0; x < w; ++x) { + *tile++ = *map_pixel++; + } + map_row += map_stride; + } +} + /** * Load tile z/stencil from the framebuffer surface. * This is a bin command called during bin processing. @@ -277,9 +294,24 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; - /* call u_tile func to load depth (and stencil?) from surface */ + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; + + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + + assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); + lp_tile_read_z32(rast->tasks[thread_index].tile.depth, + rast->zsbuf_map, + rast->zsbuf_transfer->stride, + x, y, w, h); } -- cgit v1.2.3 From 4231006e29cbf9fb54c72acf35009f3b18fe62ab Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 13 Jan 2010 20:14:04 +0000 Subject: llvmpipe: add bin debugger Adjust definition of empty_bin according to what's actually in empty bins. We often have a state packet before/after load commands. Still need to do something about the fence packets. --- src/gallium/drivers/llvmpipe/lp_rast.c | 57 +++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 7753f9bb3f6..6c7ece9fdbf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -608,6 +608,44 @@ rasterize_bin( struct lp_rasterizer *rast, lp_rast_end_tile( rast, thread_index ); } + +#define RAST(x) { lp_rast_##x, #x } + +static struct { + lp_rast_cmd cmd; + const char *name; +} cmd_names[] = +{ + RAST(load_color), + RAST(load_zstencil), + RAST(clear_color), + RAST(clear_zstencil), + RAST(triangle), + RAST(shade_tile), + RAST(set_state), + RAST(fence), +}; + +static void +debug_bin( const struct cmd_bin *bin ) +{ + const struct cmd_block *head = bin->commands.head; + int i, j; + + for (i = 0; i < head->count; i++) { + debug_printf("%d: ", i); + for (j = 0; j < Elements(cmd_names); j++) { + if (head->cmd[i] == cmd_names[j].cmd) { + debug_printf("%s\n", cmd_names[j].name); + break; + } + } + if (j == Elements(cmd_names)) + debug_printf("...other\n"); + } + +} + /* An empty bin is one that just loads the contents of the tile and * stores them again unchanged. This typically happens when bins have * been flushed for some reason in the middle of a frame, or when @@ -620,19 +658,28 @@ is_empty_bin( const struct cmd_bin *bin ) { const struct cmd_block *head = bin->commands.head; int i; - + + if (0) + debug_bin(bin); + /* We emit at most two load-tile commands at the start of the first - * command block. If there are more than two commands in the - * block, we know that the bin is non-empty. + * command block. In addition we seem to emit a couple of + * set-state commands even in empty bins. + * + * As a heuristic, if a bin has more than 4 commands, consider it + * non-empty. */ if (head->next != NULL || - head->count > 2) + head->count > 4) { return FALSE; + } for (i = 0; i < head->count; i++) if (head->cmd[i] != lp_rast_load_color && - head->cmd[i] != lp_rast_load_zstencil) + head->cmd[i] != lp_rast_load_zstencil && + head->cmd[i] != lp_rast_set_state) { return FALSE; + } return TRUE; } -- cgit v1.2.3 From a1acbff299c444913418e65da473745cd901a2db Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 13 Jan 2010 21:51:47 +0000 Subject: llvmpipe: Reset the bin when shading a whole tile with an opaque shader. --- src/gallium/drivers/llvmpipe/lp_rast.h | 2 ++ src/gallium/drivers/llvmpipe/lp_scene.c | 33 +++++++++++++++++------------ src/gallium/drivers/llvmpipe/lp_scene.h | 4 ++++ src/gallium/drivers/llvmpipe/lp_setup.c | 4 +++- src/gallium/drivers/llvmpipe/lp_setup.h | 3 ++- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 ++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 ++++++++++- 7 files changed, 48 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 46e22f69a61..d926adb6b22 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -68,6 +68,8 @@ struct lp_rast_state { * the tile color/z/stencil data somehow: */ lp_jit_frag_func jit_function; + + boolean opaque; }; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 45d54462678..967d666bb46 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -88,6 +88,25 @@ lp_scene_is_empty(struct lp_scene *scene ) } +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + struct cmd_block_list *list = &bin->commands; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->count = 0; +} + + /** * Set scene to empty state. */ @@ -100,19 +119,7 @@ lp_scene_reset(struct lp_scene *scene ) */ for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - struct cmd_block_list *list = &bin->commands; - struct cmd_block *block; - struct cmd_block *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); - } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->count = 0; + lp_scene_bin_reset(scene, i, j); } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index b59b6870026..4b6527d67c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -215,6 +215,10 @@ lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y) } +/** Remove all commands from a bin */ +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y); + /* Add a command to bin[x][y]. */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 38ea0c663f1..61b968c49f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -364,12 +364,14 @@ lp_setup_set_fs_inputs( struct setup_context *setup, void lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function ) + lp_jit_frag_func jit_function, + boolean opaque ) { LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function); /* FIXME: reference count */ setup->fs.current.jit_function = jit_function; + setup->fs.current.opaque = opaque; setup->dirty |= LP_SETUP_NEW_FS; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index bf12cb85271..bac7d73e8d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -97,7 +97,8 @@ lp_setup_set_fs_inputs( struct setup_context *setup, void lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function ); + lp_jit_frag_func jit_function, + boolean opaque ); void lp_setup_set_fs_constants(struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 575265b0f50..0f5b25b725c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -459,6 +459,12 @@ do_triangle_ccw(struct setup_context *setup, { in = 1; /* triangle covers the whole tile- shade whole tile */ + if(setup->fs.current.opaque) { + lp_scene_bin_reset( scene, x, y ); + lp_scene_bin_command( scene, x, y, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); + } lp_scene_bin_command( scene, x, y, lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c6f58018762..1ed9a2f5bfa 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1005,6 +1005,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; + boolean opaque; make_variant_key(lp, shader, &key); @@ -1021,6 +1022,15 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) shader->current = variant; + /* TODO: put this in the variant */ + opaque = !key.blend.logicop_enable && + !key.blend.blend_enable && + !key.alpha.enabled && + !key.depth.enabled && + !shader->info.uses_kill + ? TRUE : FALSE; + lp_setup_set_fs_function(lp->setup, - shader->current->jit_function); + shader->current->jit_function, + opaque); } -- cgit v1.2.3 From 7df4c88088ecf34764c558b4f7fe7ef6c82327bb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 13 Jan 2010 22:07:24 +0000 Subject: llvmpipe: Opaque shader implies complete colormask too. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 1ed9a2f5bfa..ea5868dee18 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1023,8 +1023,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) shader->current = variant; /* TODO: put this in the variant */ + /* TODO: most of these can be relaxed, in particular the colormask */ opaque = !key.blend.logicop_enable && !key.blend.blend_enable && + key.blend.colormask == 0xf && !key.alpha.enabled && !key.depth.enabled && !shader->info.uses_kill -- cgit v1.2.3 From ddb94661a43fe50a0a058a56b05c65ee0cc204d9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:22:02 -0700 Subject: llvmpipe: minor comment upgrades --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a1808fcd4c0..66654ec5e71 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -75,7 +75,7 @@ struct setup_context */ struct draw_stage *vbuf; struct lp_rasterizer *rast; - struct lp_scene *scene; /**< current scene */ + struct lp_scene *scene; /**< current scene being built */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ boolean flatshade_first; @@ -116,7 +116,7 @@ struct setup_context uint8_t *stored; } blend_color; - unsigned dirty; /**< bitmask of LP_SETUP_x bits */ + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ void (*point)( struct setup_context *, const float (*v0)[4]); -- cgit v1.2.3 From 9be1feacf2dbd36fa9fb65b9932a74f04a7d9cca Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:57:44 -0700 Subject: llvmpipe: added llvmpipe_texture_const() --- src/gallium/drivers/llvmpipe/lp_texture.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 00a20763e43..e37ef6059a2 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -73,6 +73,13 @@ llvmpipe_texture(struct pipe_texture *pt) return (struct llvmpipe_texture *) pt; } +static INLINE const struct llvmpipe_texture * +llvmpipe_texture_const(const struct pipe_texture *pt) +{ + return (const struct llvmpipe_texture *) pt; +} + + static INLINE struct llvmpipe_transfer * llvmpipe_transfer(struct pipe_transfer *pt) { -- cgit v1.2.3 From a36395d4875f5c416f1b1a6d2ed7f933e40c379c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:58:13 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 632cafa2e6e..2c349fdb1d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -135,9 +135,11 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) } - - -/* Hopefully this will remain quite simple, otherwise need to pull in +/** + * Handle state changes. + * Called just prior to drawing anything (pipe::draw_arrays(), etc). + * + * Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) -- cgit v1.2.3 From 4769328fe1ddaa1882dddbaad21239d5fdcddf19 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 10:58:36 -0700 Subject: llvmpipe: comments --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ea5868dee18..26a2d6cc239 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -999,6 +999,10 @@ make_variant_key(struct llvmpipe_context *lp, } +/** + * Update fragment state. This is called just prior to drawing + * something when some fragment-related state has changed. + */ void llvmpipe_update_fs(struct llvmpipe_context *lp) { -- cgit v1.2.3 From 592e40aa7bdbda5a09becb898300393d599c033a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 13:43:58 -0700 Subject: llvmpipe: added scene functions for texture reference counting When a texture is used in the scene we add it to a list of texture references. The lp_scene_is_textured_referenced() function tells us if a texture is referenced by the scene. --- src/gallium/drivers/llvmpipe/lp_scene.c | 48 +++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_scene.h | 16 +++++++++++ 2 files changed, 64 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 967d666bb46..191122de7db 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -27,6 +27,7 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_simple_list.h" #include "lp_scene.h" @@ -62,6 +63,8 @@ lp_scene_init(struct lp_scene *scene) scene->data.head = scene->data.tail = CALLOC_STRUCT(data_block); + make_empty_list(&scene->textures); + pipe_mutex_init(scene->mutex); } @@ -140,6 +143,18 @@ lp_scene_reset(struct lp_scene *scene ) list->head = list->tail; list->head->used = 0; } + + /* Release texture refs + */ + { + struct texture_ref *ref, *next, *ref_list = &scene->textures; + for (ref = ref_list->next; ref != ref_list; ref = next) { + next = next_elem(ref); + pipe_texture_reference(&ref->texture, NULL); + FREE(ref); + } + make_empty_list(ref_list); + } } @@ -229,6 +244,39 @@ lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) } +/** + * Add a reference to a texture by the scene. + */ +void +lp_scene_texture_reference( struct lp_scene *scene, + struct pipe_texture *texture ) +{ + struct texture_ref *ref = CALLOC_STRUCT(texture_ref); + if (ref) { + struct texture_ref *ref_list = &scene->textures; + pipe_texture_reference(&ref->texture, texture); + insert_at_tail(ref_list, ref); + } +} + + +/** + * Does this scene have a reference to the given texture? + */ +boolean +lp_scene_is_textured_referenced( const struct lp_scene *scene, + const struct pipe_texture *texture ) +{ + const struct texture_ref *ref_list = &scene->textures; + const struct texture_ref *ref; + foreach (ref, ref_list) { + if (ref->texture == texture) + return TRUE; + } + return FALSE; +} + + /** * Return last command in the bin */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 4b6527d67c7..86facf8eac2 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -97,6 +97,13 @@ struct data_block_list { }; +/** List of texture references */ +struct texture_ref { + struct pipe_texture *texture; + struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ +}; + + /** * All bins and bin data are contained here. * Per-bin data goes into the 'tile' bins. @@ -112,6 +119,9 @@ struct lp_scene { /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; + /** list of textures referenced by the scene commands */ + struct texture_ref textures; + boolean write_depth; /** @@ -150,6 +160,12 @@ unsigned lp_scene_data_size( const struct lp_scene *scene ); unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); +void lp_scene_texture_reference( struct lp_scene *scene, + struct pipe_texture *texture ); + +boolean lp_scene_is_textured_referenced( const struct lp_scene *scene, + const struct pipe_texture *texture ); + /** * Allocate space for a command/data in the bin's data buffer. -- cgit v1.2.3 From a27b12171d84c6e731af08f48a657c377f8549ba Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 13:54:46 -0700 Subject: llvmpipe: add scene texture referencing code --- src/gallium/drivers/llvmpipe/lp_setup.c | 29 +++++++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup.h | 2 +- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 61b968c49f5..bac2db92bae 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -432,25 +432,22 @@ lp_setup_set_vertex_info( struct setup_context *setup, } +/** + * Called during state validation when LP_NEW_TEXTURE is set. + */ void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture) { - struct pipe_texture *dummy; unsigned i; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - assert(num <= PIPE_MAX_SAMPLERS); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { struct pipe_texture *tex = i < num ? texture[i] : NULL; - /* FIXME: hold on to the reference */ - dummy = NULL; - pipe_texture_reference(&dummy, tex); - if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); struct lp_jit_texture *jit_tex; @@ -463,21 +460,37 @@ lp_setup_set_sampler_textures( struct setup_context *setup, else /* FIXME: map the rendertarget */ assert(0); + + /* the scene references this texture */ + { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + lp_scene_texture_reference(scene, tex); + } } } setup->dirty |= LP_SETUP_NEW_FS; } + +/** + * Is the given texture referenced in the setup module's current scene? + */ boolean -lp_setup_is_texture_referenced( struct setup_context *setup, +lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { - /* FIXME */ + const struct lp_scene *scene = setup->scene; + if (scene && lp_scene_is_textured_referenced(scene, texture)) { + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } +/** + * Called by vbuf code when we're about to draw something. + */ void lp_setup_update_state( struct setup_context *setup ) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index bac7d73e8d2..429abeba43b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -118,7 +118,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture); boolean -lp_setup_is_texture_referenced( struct setup_context *setup, +lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ); void -- cgit v1.2.3 From d59fe448967addb3025d7df90888ff950e03a343 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 14:51:26 -0700 Subject: llvmpipe: check for texture usage in all scenes --- src/gallium/drivers/llvmpipe/lp_setup.c | 20 ++++++++++---------- src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 ++++++ 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index bac2db92bae..8193b107d9d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -50,10 +50,6 @@ #include "draw/draw_vbuf.h" -/** XXX temporary value, temporary here */ -#define MAX_SCENES 2 - - static void set_scene_state( struct setup_context *, unsigned ); @@ -474,15 +470,19 @@ lp_setup_set_sampler_textures( struct setup_context *setup, /** - * Is the given texture referenced in the setup module's current scene? + * Is the given texture referenced by any scene? + * Note: we have to check all scenes including any scenes currently + * being rendered and the current scene being built. */ boolean lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { - const struct lp_scene *scene = setup->scene; - if (scene && lp_scene_is_textured_referenced(scene, texture)) { - return PIPE_REFERENCED_FOR_READ; + unsigned i; + for (i = 0; i < Elements(setup->scenes); i++) { + if (lp_scene_is_textured_referenced(setup->scenes[i], texture)) { + return PIPE_REFERENCED_FOR_READ; + } } return PIPE_UNREFERENCED; } @@ -645,8 +645,8 @@ lp_setup_create( struct pipe_screen *screen, /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { - struct lp_scene *scene = lp_scene_create(); - lp_scene_enqueue(setup->empty_scenes, scene); + setup->scenes[i] = lp_scene_create(); + lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]); } setup->triangle = first_triangle; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 66654ec5e71..e6f6f0e0bbb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -50,6 +50,11 @@ struct lp_scene_queue; +/** Max number of scenes */ +#define MAX_SCENES 2 + + + /** * Point/line/triangle setup context. * Note: "stored" below indicates data which is stored in the bins, @@ -75,6 +80,7 @@ struct setup_context */ struct draw_stage *vbuf; struct lp_rasterizer *rast; + struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ struct lp_scene *scene; /**< current scene being built */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ -- cgit v1.2.3 From db58192cfb63cbb7b1d84e7ae7429799ce888164 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:01:35 -0700 Subject: llvmpipe: re-get scene pointer after flushing --- src/gallium/drivers/llvmpipe/lp_setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 8193b107d9d..4f77d04ca53 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -249,6 +249,9 @@ lp_setup_bind_framebuffer( struct setup_context *setup, set_scene_state( setup, SETUP_FLUSHED ); + /* re-get scene pointer, may have a new scene after flushing */ + scene = lp_setup_get_current_scene(setup); + util_copy_framebuffer_state(&setup->fb, fb); lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); -- cgit v1.2.3 From 0b279c5382da021a71cdc8ed3afa09983817539c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:03:42 -0700 Subject: llvmpipe: indentation fixes --- src/gallium/drivers/llvmpipe/lp_setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4f77d04ca53..11b1b5f319f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -292,13 +292,13 @@ lp_setup_clear( struct setup_context *setup, */ if (flags & PIPE_CLEAR_COLOR) lp_scene_bin_everywhere( scene, - lp_rast_clear_color, - setup->clear.color ); + lp_rast_clear_color, + setup->clear.color ); if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) lp_scene_bin_everywhere( scene, - lp_rast_clear_zstencil, - setup->clear.zstencil ); + lp_rast_clear_zstencil, + setup->clear.zstencil ); } else { /* Put ourselves into the 'pre-clear' state, specifically to try -- cgit v1.2.3 From 12872774461a84f0a7c272aff5aac5e30a78a7c2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:30:42 -0700 Subject: llvmpipe: also check render target textures in lp_setup_is_texture_referenced() --- src/gallium/drivers/llvmpipe/lp_setup.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 11b1b5f319f..ce006bf618e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -487,6 +487,15 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, return PIPE_REFERENCED_FOR_READ; } } + + /* check the render targets */ + for (i = 0; i < setup->fb.nr_cbufs; i++) { + if (setup->fb.cbufs[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + } + if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return PIPE_UNREFERENCED; } -- cgit v1.2.3 From 018b78ad649e88cc6d8b6b10aef1502075508515 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 15:32:55 -0700 Subject: llvmpipe: check render targets before other textures --- src/gallium/drivers/llvmpipe/lp_setup.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ce006bf618e..649e97992ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -482,19 +482,22 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { unsigned i; - for (i = 0; i < Elements(setup->scenes); i++) { - if (lp_scene_is_textured_referenced(setup->scenes[i], texture)) { - return PIPE_REFERENCED_FOR_READ; - } - } /* check the render targets */ for (i = 0; i < setup->fb.nr_cbufs; i++) { if (setup->fb.cbufs[i]->texture == texture) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } - if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) + if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) { return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + } + + /* check textures referenced by the scene */ + for (i = 0; i < Elements(setup->scenes); i++) { + if (lp_scene_is_textured_referenced(setup->scenes[i], texture)) { + return PIPE_REFERENCED_FOR_READ; + } + } return PIPE_UNREFERENCED; } -- cgit v1.2.3 From f19f218e7aad76639a6aacabda8101ba87bb4896 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 18:01:45 -0700 Subject: llvmpipe: minor assorted clean-ups --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 0f5b25b725c..39ad983a356 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -36,6 +36,7 @@ #define NUM_CHANNELS 4 + /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ @@ -45,10 +46,11 @@ static void constant_coef( struct lp_rast_triangle *tri, unsigned i ) { tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0; - tri->inputs.dady[slot][i] = 0; + tri->inputs.dadx[slot][i] = 0.0f; + tri->inputs.dady[slot][i] = 0.0f; } + /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. @@ -184,8 +186,7 @@ static void setup_tri_coefficients( struct setup_context *setup, /* Allocate space for the a0, dadx and dady arrays */ { - unsigned bytes; - bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); + unsigned bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); @@ -281,7 +282,7 @@ do_triangle_ccw(struct setup_context *setup, * * XXX: subject to overflow?? */ - if (area <= 0) { + if (area <= 0.0f) { lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -306,8 +307,7 @@ do_triangle_ccw(struct setup_context *setup, */ setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); - /* half-edge constants, will be interated over the whole - * rendertarget. + /* half-edge constants, will be interated over the whole render target. */ tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; @@ -494,6 +494,7 @@ do_triangle_ccw(struct setup_context *setup, } } + static void triangle_cw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -502,6 +503,7 @@ static void triangle_cw( struct setup_context *setup, do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); } + static void triangle_ccw( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -510,6 +512,7 @@ static void triangle_ccw( struct setup_context *setup, do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); } + static void triangle_both( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -522,12 +525,13 @@ static void triangle_both( struct setup_context *setup, const float fy = v1[0][1] - v2[0][1]; /* det = cross(e,f).z */ - if (ex * fy - ey * fx < 0) + if (ex * fy - ey * fx < 0.0f) triangle_ccw( setup, v0, v1, v2 ); else triangle_cw( setup, v0, v1, v2 ); } + static void triangle_nop( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], @@ -554,5 +558,3 @@ lp_setup_choose_triangle( struct setup_context *setup ) break; } } - - -- cgit v1.2.3 From f94a99170ecdc3286408b3628fbae9f45518007e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 18:54:48 -0700 Subject: llvmpipe: optimize lp_rast_clear_color() for non-gray colors This makes a big difference in progs that clear to a non-gray color. Some demos are 30-50% faster. --- src/gallium/drivers/llvmpipe/lp_rast.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6c7ece9fdbf..3849116758f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -187,17 +187,33 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { + /* clear to grayscale value {x, x, x, x} */ for (i = 0; i < rast->state.fb.nr_cbufs; i++) { memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } else { - unsigned x, y, chan; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan]; + /* Non-gray color. + * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code + * will need to change. It'll be pretty obvious when clearing no longer + * works. + */ + const unsigned chunk = TILE_SIZE / 4; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + uint8_t *c = color_tile[i]; + unsigned j; + for (j = 0; j < 4 * TILE_SIZE; j++) { + memset(c, clear_color[0], chunk); + c += chunk; + memset(c, clear_color[1], chunk); + c += chunk; + memset(c, clear_color[2], chunk); + c += chunk; + memset(c, clear_color[3], chunk); + c += chunk; + } + assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4); + } } } -- cgit v1.2.3 From 2ba1c8189a124932b7b35115caf8f442bf4a7125 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Jan 2010 18:58:38 -0700 Subject: llvmpipe: use one loop in lp_rast_clear_zstencil() This is just a tiny bit faster. --- src/gallium/drivers/llvmpipe/lp_rast.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3849116758f..75562bf62dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -226,14 +226,13 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg) { - unsigned i, j; + unsigned i; uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - for (i = 0; i < TILE_SIZE; i++) - for (j = 0; j < TILE_SIZE; j++) - depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil; + for (i = 0; i < TILE_SIZE * TILE_SIZE; i++) + depth_tile[i] = arg.clear_zstencil; } -- cgit v1.2.3 From 4414a1a73ca649df12b514daa82381a2dbde2ba4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 14:19:16 -0700 Subject: llvmpipe: assorted clean-ups in texture code --- src/gallium/drivers/llvmpipe/lp_context.c | 1 - src/gallium/drivers/llvmpipe/lp_texture.c | 53 +++++++++++++++---------------- src/gallium/drivers/llvmpipe/lp_texture.h | 8 ++--- 3 files changed, 29 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 8d965175f8c..bd549d4028b 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -166,7 +166,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.is_buffer_referenced = llvmpipe_is_buffer_referenced; llvmpipe_init_query_funcs( llvmpipe ); - llvmpipe_init_texture_funcs( llvmpipe ); /* * Create drawing context and plug our rendering stage into it. diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index a953e8845a0..14f636e4aeb 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -46,21 +46,19 @@ #include "lp_winsys.h" -/* Simple, maximally packed layout. - */ - -/* Conventional allocation path for non-display textures: +/** + * Conventional allocation path for non-display textures: + * Simple, maximally packed layout. */ static boolean llvmpipe_texture_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture * lpt) + struct llvmpipe_texture *lpt) { struct pipe_texture *pt = &lpt->base; unsigned level; unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned buffer_size = 0; for (level = 0; level <= pt->last_level; level++) { @@ -79,7 +77,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = u_minify(width, 1); + width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); } @@ -89,9 +87,11 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, return lpt->data != NULL; } + + static boolean llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture * lpt) + struct llvmpipe_texture *lpt) { struct llvmpipe_winsys *winsys = screen->winsys; @@ -106,9 +106,6 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, } - - - static struct pipe_texture * llvmpipe_texture_create(struct pipe_screen *_screen, const struct pipe_texture *templat) @@ -125,7 +122,7 @@ llvmpipe_texture_create(struct pipe_screen *_screen, /* XXX: The xlib state tracker is brain-dead and will request * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it. */ - if(lpt->base.format == PIPE_FORMAT_Z16_UNORM) + if (lpt->base.format == PIPE_FORMAT_Z16_UNORM) lpt->base.format = PIPE_FORMAT_Z32_UNORM; if (lpt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | @@ -189,12 +186,15 @@ llvmpipe_texture_destroy(struct pipe_texture *pt) struct llvmpipe_screen *screen = llvmpipe_screen(pt->screen); struct llvmpipe_texture *lpt = llvmpipe_texture(pt); - if(lpt->dt) { + if (lpt->dt) { + /* display target */ struct llvmpipe_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpt->dt); } - else + else { + /* regular texture */ align_free(lpt->data); + } FREE(lpt); } @@ -356,7 +356,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, lpt = llvmpipe_texture(transfer->texture); format = lpt->base.format; - if(lpt->dt) { + if (lpt->dt) { + /* display target */ struct llvmpipe_winsys *winsys = screen->winsys; map = winsys->displaytarget_map(winsys, lpt->dt, @@ -364,14 +365,15 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if (map == NULL) return NULL; } - else + else { + /* regular texture */ map = lpt->data; + } /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) - { + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. */ screen->timestamp++; @@ -386,28 +388,23 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, static void -llvmpipe_transfer_unmap(struct pipe_screen *_screen, +llvmpipe_transfer_unmap(struct pipe_screen *screen, struct pipe_transfer *transfer) { - struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + struct llvmpipe_screen *lp_screen = llvmpipe_screen(screen); struct llvmpipe_texture *lpt; assert(transfer->texture); lpt = llvmpipe_texture(transfer->texture); - if(lpt->dt) { - struct llvmpipe_winsys *winsys = screen->winsys; + if (lpt->dt) { + /* display target */ + struct llvmpipe_winsys *winsys = lp_screen->winsys; winsys->displaytarget_unmap(winsys, lpt->dt); } } -void -llvmpipe_init_texture_funcs(struct llvmpipe_context *lp) -{ -} - - void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index e37ef6059a2..87c905bc027 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -37,6 +37,7 @@ struct pipe_screen; struct llvmpipe_context; struct llvmpipe_displaytarget; + struct llvmpipe_texture { struct pipe_texture base; @@ -58,6 +59,7 @@ struct llvmpipe_texture unsigned timestamp; }; + struct llvmpipe_transfer { struct pipe_transfer base; @@ -73,6 +75,7 @@ llvmpipe_texture(struct pipe_texture *pt) return (struct llvmpipe_texture *) pt; } + static INLINE const struct llvmpipe_texture * llvmpipe_texture_const(const struct pipe_texture *pt) { @@ -87,11 +90,8 @@ llvmpipe_transfer(struct pipe_transfer *pt) } -extern void -llvmpipe_init_texture_funcs( struct llvmpipe_context *llvmpipe ); - extern void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); -#endif /* LP_TEXTURE */ +#endif /* LP_TEXTURE_H */ -- cgit v1.2.3 From dfd6e762e70aef6694fa3baedf8d423b08995233 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 16:21:49 -0700 Subject: llvmpipe: remove redundant code in llvmpipe_set_blend_color() --- src/gallium/drivers/llvmpipe/lp_state_blend.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index a10c5918df3..9b950e82d89 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -80,9 +80,6 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) return; - if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) - return; - draw_flush(llvmpipe->draw); memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); -- cgit v1.2.3 From 4f5675e94b936d012b89937aac8a16c28143d5ec Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:04:40 -0700 Subject: gallium/util: added debug_dump_texture() and ppm output Now we can get dump debug images on Linux too. --- src/gallium/auxiliary/util/u_debug.c | 75 ++++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_debug.h | 4 ++ 2 files changed, 79 insertions(+) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 9b4e6ca2a73..7ee0864d292 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -631,6 +631,14 @@ const char *u_prim_name( unsigned prim ) #ifdef DEBUG +/** + * Dump an image to a .raw or .ppm file (depends on OS). + * \param format PIPE_FORMAT_x + * \param cpp bytes per pixel + * \param width width in pixels + * \param height height in pixels + * \param stride row stride in bytes + */ void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, @@ -672,6 +680,52 @@ void debug_dump_image(const char *prefix, } EngUnmapFile(iFile); +#elif defined(PIPE_OS_UNIX) + /* write a ppm file */ + char filename[256]; + FILE *f; + + util_snprintf(filename, sizeof(filename), "%s.ppm", prefix); + + f = fopen(filename, "w"); + if (f) { + int i, x, y; + int r, g, b; + const uint8_t *ptr = (uint8_t *) data; + + /* XXX this is a hack */ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + r = 2; + g = 1; + b = 0; + break; + default: + r = 0; + g = 1; + b = 1; + } + + fprintf(f, "P6\n"); + fprintf(f, "# ppm-file created by osdemo.c\n"); + fprintf(f, "%i %i\n", width, height); + fprintf(f, "255\n"); + fclose(f); + + f = fopen(filename, "ab"); /* reopen in binary append mode */ + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + i = y * stride + x * cpp; + fputc(ptr[i + r], f); /* write red */ + fputc(ptr[i + g], f); /* write green */ + fputc(ptr[i + b], f); /* write blue */ + } + } + fclose(f); + } + else { + fprintf(stderr, "Can't open %s for writing\n", filename); + } #endif } @@ -712,6 +766,27 @@ error: } +void debug_dump_texture(const char *prefix, + struct pipe_texture *texture) +{ + struct pipe_surface *surface; + struct pipe_screen *screen; + + if (!texture) + return; + + screen = texture->screen; + + /* XXX for now, just dump image for face=0, level=0 */ + surface = screen->get_tex_surface(screen, texture, 0, 0, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + if (surface) { + debug_dump_surface(prefix, surface); + screen->tex_surface_destroy(surface); + } +} + + #pragma pack(push,2) struct bmp_file_header { uint16_t bfType; diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index facc30a5534..131c9915391 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -354,6 +354,8 @@ debug_memory_end(unsigned long beginning); #ifdef DEBUG struct pipe_surface; struct pipe_transfer; +struct pipe_texture; + void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, @@ -361,6 +363,8 @@ void debug_dump_image(const char *prefix, const void *data); void debug_dump_surface(const char *prefix, struct pipe_surface *surface); +void debug_dump_texture(const char *prefix, + struct pipe_texture *texture); void debug_dump_surface_bmp(const char *filename, struct pipe_surface *surface); void debug_dump_transfer_bmp(const char *filename, -- cgit v1.2.3 From 16ecd2f0780c7b9123aa50d5a174c127a1e408b2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:05:55 -0700 Subject: llvmpipe: clean-up, fixing up frame dump/debug code --- src/gallium/drivers/llvmpipe/lp_flush.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 16fb00092e6..07f32848c7f 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -74,16 +74,21 @@ llvmpipe_flush( struct pipe_context *pipe, /* Enable to dump BMPs of the color/depth buffers each frame */ #if 0 - if(flags & PIPE_FLUSH_FRAME) { + if (flags & PIPE_FLUSH_FRAME) { static unsigned frame_no = 1; - static char filename[256]; + char filename[256]; unsigned i; - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs) { - util_snprintf(filename, sizeof(filename), "cbuf%u_%u.bmp", i, frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[i]); + + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); + debug_dump_surface(filename, llvmpipe->framebuffer.cbufs[i]); + } + + if (0) { + util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); + debug_dump_surface(filename, llvmpipe->framebuffer.zsbuf); } - util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.zsbuf); + ++frame_no; } #endif -- cgit v1.2.3 From ca12e30d97b83fb33e1f8f83da05b5ed2809b0af Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:08:19 -0700 Subject: llvmpipe: minor comment update --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 39ad983a356..716b88073e5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -131,7 +131,7 @@ static void perspective_coef( struct lp_rast_triangle *tri, /** * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. + * X and Y are trivial * Z and W are copied from position_coef which should have already been computed. * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ -- cgit v1.2.3 From 4461442849bfdb817334b38567136f7f9dabdf59 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Jan 2010 19:15:00 -0700 Subject: llvmpipe: implement scissor testing The scissor test is implemented as another per-quad operation in the JIT code. The four scissor box params are passed via the lp_jit_context. In the JIT code we compare the quad's x/y coords against the clip bounds and create a new in/out mask that's AND'd with the main quad mask. Note: we should also do scissor testing in the triangle setup code to improve efficiency. That's not done yet. --- src/gallium/drivers/llvmpipe/lp_jit.c | 21 +++++++--- src/gallium/drivers/llvmpipe/lp_jit.h | 19 ++++++++- src/gallium/drivers/llvmpipe/lp_setup.c | 34 ++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup.h | 4 ++ src/gallium/drivers/llvmpipe/lp_setup_context.h | 6 +++ src/gallium/drivers/llvmpipe/lp_state.h | 1 + src/gallium/drivers/llvmpipe/lp_state_derived.c | 4 ++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 52 +++++++++++++++++++++++++ 8 files changed, 134 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 4ef0783f3e2..429cb973c26 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -79,13 +79,16 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[8]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ elem_types[2] = LLVMFloatType(); /* scissor_xmin */ + elem_types[3] = LLVMFloatType(); /* scissor_ymin */ + elem_types[4] = LLVMFloatType(); /* scissor_xmax */ + elem_types[5] = LLVMFloatType(); /* scissor_ymax */ + elem_types[6] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[7] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -93,8 +96,16 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, screen->target, context_type, 1); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin, screen->target, context_type, 2); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin, + screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax, + screen->target, context_type, 4); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax, + screen->target, context_type, 5); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, + screen->target, context_type, 6); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 3b316914b02..9cbe1bd3b1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -79,6 +79,9 @@ struct lp_jit_context float alpha_ref_value; + /** floats, not ints */ + float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax; + /* FIXME: store (also?) in floats */ uint8_t *blend_color; @@ -92,10 +95,22 @@ struct lp_jit_context #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") +#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 2, "scissor_xmin") + +#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 3, "scissor_ymin") + +#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 4, "scissor_xmax") + +#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 5, "scissor_ymax") + #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "blend_color") + lp_build_struct_get(_builder, _ptr, 6, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 7 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 649e97992ba..284337e8252 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -413,6 +413,21 @@ lp_setup_set_blend_color( struct setup_context *setup, } +void +lp_setup_set_scissor( struct setup_context *setup, + const struct pipe_scissor_state *scissor ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(scissor); + + if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) { + setup->scissor.current = *scissor; /* struct copy */ + setup->dirty |= LP_SETUP_NEW_SCISSOR; + } +} + + void lp_setup_set_flatshade_first( struct setup_context *setup, boolean flatshade_first ) @@ -534,6 +549,25 @@ lp_setup_update_state( struct setup_context *setup ) setup->dirty |= LP_SETUP_NEW_FS; } + if (setup->dirty & LP_SETUP_NEW_SCISSOR) { + float *stored; + + stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16); + + stored[0] = (float) setup->scissor.current.minx; + stored[1] = (float) setup->scissor.current.miny; + stored[2] = (float) setup->scissor.current.maxx; + stored[3] = (float) setup->scissor.current.maxy; + + setup->scissor.stored = stored; + + setup->fs.current.jit_context.scissor_xmin = stored[0]; + setup->fs.current.jit_context.scissor_ymin = stored[1]; + setup->fs.current.jit_context.scissor_xmax = stored[2]; + setup->fs.current.jit_context.scissor_ymax = stored[3]; + + setup->dirty |= LP_SETUP_NEW_FS; + } if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { struct pipe_buffer *buffer = setup->constants.current; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 429abeba43b..c7ef3d394a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -113,6 +113,10 @@ void lp_setup_set_blend_color( struct setup_context *setup, const struct pipe_blend_color *blend_color ); +void +lp_setup_set_scissor( struct setup_context *setup, + const struct pipe_scissor_state *scissor ); + void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index e6f6f0e0bbb..fc0aef1376c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -45,6 +45,7 @@ #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 #define LP_SETUP_NEW_BLEND_COLOR 0x04 +#define LP_SETUP_NEW_SCISSOR 0x08 struct lp_scene_queue; @@ -122,6 +123,11 @@ struct setup_context uint8_t *stored; } blend_color; + struct { + struct pipe_scissor_state current; + const void *stored; + } scissor; + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ void (*point)( struct setup_context *, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 4c6747bb2b6..ddb152c0740 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -72,6 +72,7 @@ struct lp_fragment_shader_variant_key enum pipe_format zsbuf_format; unsigned nr_cbufs:8; unsigned flatshade:1; + unsigned scissor:1; struct { ubyte colormask; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 2c349fdb1d1..28af477914c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -160,6 +160,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | + LP_NEW_SCISSOR | LP_NEW_DEPTH_STENCIL_ALPHA | LP_NEW_RASTERIZER | LP_NEW_SAMPLER | @@ -170,6 +171,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); + if (llvmpipe->dirty & LP_NEW_SCISSOR) + lp_setup_set_scissor(llvmpipe->setup, &llvmpipe->scissor); + if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) lp_setup_set_alpha_ref_value(llvmpipe->setup, llvmpipe->depth_stencil->alpha.ref_value); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 26a2d6cc239..d12d3f6091a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -304,6 +304,51 @@ generate_tri_edge_mask(LLVMBuilderRef builder, } +static LLVMValueRef +generate_scissor_test(LLVMBuilderRef builder, + LLVMValueRef context_ptr, + const struct lp_build_interp_soa_context *interp, + struct lp_type type) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; + LLVMValueRef xmin, ymin, xmax, ymax; + LLVMValueRef m0, m1, m2, m3, m; + + /* xpos, ypos contain the window coords for the four pixels in the quad */ + assert(xpos); + assert(ypos); + + /* get the current scissor bounds, convert to vectors */ + xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); + xmin = lp_build_broadcast(builder, vec_type, xmin); + + ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); + ymin = lp_build_broadcast(builder, vec_type, ymin); + + xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); + xmax = lp_build_broadcast(builder, vec_type, xmax); + + ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); + ymax = lp_build_broadcast(builder, vec_type, ymax); + + /* compare the fragment's position coordinates against the scissor bounds */ + m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); + m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); + m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); + m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); + + /* AND all the masks together */ + m = LLVMBuildAnd(builder, m0, m1, ""); + m = LLVMBuildAnd(builder, m, m2, ""); + m = LLVMBuildAnd(builder, m, m3, ""); + + lp_build_name(m, "scissormask"); + + return m; +} + + /** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] @@ -372,6 +417,11 @@ generate_fs(struct llvmpipe_context *lp, /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); + if (key->scissor) { + LLVMValueRef smask = + generate_scissor_test(builder, context_ptr, interp, type); + lp_build_mask_update(&mask, smask); + } early_depth_test = key->depth.enabled && @@ -968,6 +1018,7 @@ make_variant_key(struct llvmpipe_context *lp, /* alpha.ref_value is passed in jit_context */ key->flatshade = lp->rasterizer->flatshade; + key->scissor = lp->rasterizer->scissor; if (lp->framebuffer.nr_cbufs) { memcpy(&key->blend, lp->blend, sizeof key->blend); @@ -1033,6 +1084,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) key.blend.colormask == 0xf && !key.alpha.enabled && !key.depth.enabled && + !key.scissor && !shader->info.uses_kill ? TRUE : FALSE; -- cgit v1.2.3 From f97eeeec6b9f8a979f9452e2dfdab86ccb058b64 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 09:16:26 -0700 Subject: llvmpipe: add extra flags to DEFINES, no CFLAGS Doesn't make any real difference but -D flags are put into DEFINES everywhere else. --- src/gallium/drivers/llvmpipe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 264999a7cea..3d8d88179a9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,7 +3,7 @@ include $(TOP)/configs/current LIBNAME = llvmpipe -CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ lp_scene.c \ -- cgit v1.2.3 From 345178a7fc793c7789750de9f5e6d89f8e03af7c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 10:04:16 -0700 Subject: llvmpipe: put labels on some value refs --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 5ee8d556a68..854dd0b28c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -172,7 +172,7 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %s\n", + _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n", debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); @@ -201,9 +201,13 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_ifloor(&bld->coord_bld, s); y = lp_build_ifloor(&bld->coord_bld, t); + lp_build_name(x, "tex.x.floor"); + lp_build_name(y, "tex.y.floor"); x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + lp_build_name(x, "tex.x.wrapped"); + lp_build_name(y, "tex.y.wrapped"); lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } -- cgit v1.2.3 From 3b1920a34903dfb753bc2a0461fef204d39846c6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 10:25:59 -0700 Subject: llvmpipe: asst code changes in lp_state_fs.c --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 59 ++++++++++++++++-------------- 1 file changed, 32 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d12d3f6091a..f15fca293bc 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -596,6 +596,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; + LLVMValueRef function; unsigned num_fs; unsigned i; unsigned chan; @@ -652,30 +653,33 @@ generate_fragment(struct llvmpipe_context *lp, func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - variant->function = LLVMAddFunction(screen->module, "shader", func_type); - LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + function = LLVMAddFunction(screen->module, "shader", func_type); + LLVMSetFunctionCallConv(function, LLVMCCallConv); + + variant->function = function; + /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); - - context_ptr = LLVMGetParam(variant->function, 0); - x = LLVMGetParam(variant->function, 1); - y = LLVMGetParam(variant->function, 2); - a0_ptr = LLVMGetParam(variant->function, 3); - dadx_ptr = LLVMGetParam(variant->function, 4); - dady_ptr = LLVMGetParam(variant->function, 5); - color_ptr_ptr = LLVMGetParam(variant->function, 6); - depth_ptr = LLVMGetParam(variant->function, 7); - c0 = LLVMGetParam(variant->function, 8); - c1 = LLVMGetParam(variant->function, 9); - c2 = LLVMGetParam(variant->function, 10); - step0_ptr = LLVMGetParam(variant->function, 11); - step1_ptr = LLVMGetParam(variant->function, 12); - step2_ptr = LLVMGetParam(variant->function, 13); + LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(function, 0); + x = LLVMGetParam(function, 1); + y = LLVMGetParam(function, 2); + a0_ptr = LLVMGetParam(function, 3); + dadx_ptr = LLVMGetParam(function, 4); + dady_ptr = LLVMGetParam(function, 5); + color_ptr_ptr = LLVMGetParam(function, 6); + depth_ptr = LLVMGetParam(function, 7); + c0 = LLVMGetParam(function, 8); + c1 = LLVMGetParam(function, 9); + c2 = LLVMGetParam(function, 10); + step0_ptr = LLVMGetParam(function, 11); + step1_ptr = LLVMGetParam(function, 12); + step2_ptr = LLVMGetParam(function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -696,7 +700,7 @@ generate_fragment(struct llvmpipe_context *lp, * Function body */ - block = LLVMAppendBasicBlock(variant->function, "entry"); + block = LLVMAppendBasicBlock(function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); @@ -788,33 +792,30 @@ generate_fragment(struct llvmpipe_context *lp, /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) - LLVMDumpValue(variant->function); + LLVMDumpValue(function); abort(); } #endif /* Apply optimizations to LLVM IR */ if (1) - LLVMRunFunctionPassManager(screen->pass, variant->function); + LLVMRunFunctionPassManager(screen->pass, function); if (LP_DEBUG & DEBUG_JIT) { /* Print the LLVM IR to stderr */ - LLVMDumpValue(variant->function); + LLVMDumpValue(function); debug_printf("\n"); } /* * Translate the LLVM IR into machine code. */ - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); + variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) lp_disassemble(variant->jit_function); - - variant->next = shader->variants; - shader->variants = variant; } @@ -888,6 +889,10 @@ generate_variant(struct llvmpipe_context *lp, generate_fragment(lp, shader, variant); + /* insert new variant into linked list */ + variant->next = shader->variants; + shader->variants = variant; + return variant; } -- cgit v1.2.3 From 2797f2bf57562c95a601a67edca3089641215cc4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:21:16 -0700 Subject: llvmpipe: generate two shader varients, one omits triangle in/out testing When we know that a 4x4 pixel block is entirely inside of a triangle use the jit function which omits the in/out test code. Results in a few percent speedup in many tests. --- src/gallium/drivers/llvmpipe/lp_rast.c | 52 +++++++++++++++---------- src/gallium/drivers/llvmpipe/lp_rast.h | 6 ++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 43 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 11 ++---- src/gallium/drivers/llvmpipe/lp_setup.c | 12 +++--- src/gallium/drivers/llvmpipe/lp_setup.h | 7 ++-- src/gallium/drivers/llvmpipe/lp_state.h | 4 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 59 ++++++++++++++++++++++------- 8 files changed, 142 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 75562bf62dc..d03ba1752d6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -344,9 +344,6 @@ void lp_rast_set_state( struct lp_rasterizer *rast, -/* Within a tile: - */ - /** * Run the shader on all blocks in a tile. This is used when a tile is * completely contained inside a triangle. @@ -356,8 +353,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = rast->tasks[thread_index].x; const unsigned tile_y = rast->tasks[thread_index].y; @@ -365,16 +362,35 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - /* Use the existing preference for 4x4 (four quads) shading: - */ - for (y = 0; y < TILE_SIZE; y += 4) - for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, - thread_index, - inputs, - tile_x + x, - tile_y + y, - c1, c2, c3); + /* render the whole 64x64 tile in 4x4 chunks */ + for (y = 0; y < TILE_SIZE; y += 4){ + for (x = 0; x < TILE_SIZE; x += 4) { + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + uint32_t *depth; + unsigned block_offset, i; + + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); + + /* color buffer */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + + /* run shader */ + state->jit_function[0]( &state->jit_context, + tile_x + x, tile_y + y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); + } + } } @@ -411,7 +427,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, iy = y % TILE_SIZE; /* offset of the 16x16 pixel block within the tile */ - block_offset = ((iy/4)*(16*16) + (ix/4)*16); + block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); /* color buffer */ for (i = 0; i < rast->state.fb.nr_cbufs; i++) @@ -433,7 +449,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, #endif /* run shader */ - state->jit_function( &state->jit_context, + state->jit_function[1]( &state->jit_context, x, y, inputs->a0, inputs->dadx, @@ -445,8 +461,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, } -/* End of tile: - */ /** diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index d926adb6b22..2a97fe4c67b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -66,8 +66,10 @@ struct lp_rast_state { /* The shader itself. Probably we also need to pass a pointer to * the tile color/z/stencil data somehow: - */ - lp_jit_frag_func jit_function; + * jit_function[0] skips the triangle in/out test code + * jit_function[1] does triangle in/out testing + */ + lp_jit_frag_func jit_function[2]; boolean opaque; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5afdeab049c..607968e3459 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -30,6 +30,7 @@ #include "pipe/p_thread.h" #include "lp_rast.h" +#include "lp_tile_soa.h" #define MAX_THREADS 8 /* XXX probably temporary here */ @@ -126,4 +127,46 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3); + +/** + * Shade all pixels in a 4x4 block. The fragment code omits the + * triangle in/out tests. + * \param x, y location of 4x4 block in window coords + */ +static INLINE void +lp_rast_shade_quads_all( struct lp_rasterizer *rast, + unsigned thread_index, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y ) +{ + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + void *depth; + unsigned block_offset, i; + + /* offset of the containing 16x16 pixel block within the tile */ + block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; + + /* color buffer */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + + /* run shader */ + state->jit_function[0]( &state->jit_context, + x, y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); +} + + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index bc7397f50c5..9c3f699ec71 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -89,13 +89,10 @@ block_full_4( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y ) { - /* Set c1,c2,c3 to large values so the in/out test always passes */ - const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; - lp_rast_shade_quads(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y, - c1, c2, c3); + lp_rast_shade_quads_all(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 284337e8252..355c0518372 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -362,14 +362,16 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function, - boolean opaque ) +lp_setup_set_fs_functions( struct setup_context *setup, + lp_jit_frag_func jit_function0, + lp_jit_frag_func jit_function1, + boolean opaque ) { - LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function0); /* FIXME: reference count */ - setup->fs.current.jit_function = jit_function; + setup->fs.current.jit_function[0] = jit_function0; + setup->fs.current.jit_function[1] = jit_function1; setup->fs.current.opaque = opaque; setup->dirty |= LP_SETUP_NEW_FS; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index c7ef3d394a4..407f7527770 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -96,9 +96,10 @@ lp_setup_set_fs_inputs( struct setup_context *setup, unsigned nr ); void -lp_setup_set_fs_function( struct setup_context *setup, - lp_jit_frag_func jit_function, - boolean opaque ); +lp_setup_set_fs_functions( struct setup_context *setup, + lp_jit_frag_func jit_function0, + lp_jit_frag_func jit_function1, + boolean opaque ); void lp_setup_set_fs_constants(struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index ddb152c0740..224b6e523c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -88,9 +88,9 @@ struct lp_fragment_shader_variant struct lp_fragment_shader_variant_key key; - LLVMValueRef function; + LLVMValueRef function[2]; - lp_jit_frag_func jit_function; + lp_jit_frag_func jit_function[2]; struct lp_fragment_shader_variant *next; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f15fca293bc..a8f4a4ed463 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -349,9 +349,26 @@ generate_scissor_test(LLVMBuilderRef builder, } +static LLVMValueRef +build_int32_vec_const(int value) +{ + struct lp_type i32_type; + + memset(&i32_type, 0, sizeof i32_type); + i32_type.floating = FALSE; /* values are integers */ + i32_type.sign = TRUE; /* values are signed */ + i32_type.norm = FALSE; /* values are not normalized */ + i32_type.width = 32; /* 32-bit int values */ + i32_type.length = 4; /* 4 elements per vector */ + return lp_build_int_const_scalar(i32_type, value); +} + + + /** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] + * \param do_tri_test if 1, do triangle edge in/out testing */ static void generate_fs(struct llvmpipe_context *lp, @@ -366,6 +383,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef *pmask, LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, + unsigned do_tri_test, LLVMValueRef c0, LLVMValueRef c1, LLVMValueRef c2, @@ -411,8 +429,13 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ - generate_tri_edge_mask(builder, i, pmask, - c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + if (do_tri_test) { + generate_tri_edge_mask(builder, i, pmask, + c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + } + else { + *pmask = build_int32_vec_const(~0); + } /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); @@ -563,7 +586,8 @@ generate_blend(const struct pipe_blend_state *blend, static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, - struct lp_fragment_shader_variant *variant) + struct lp_fragment_shader_variant *variant, + unsigned do_tri_test) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; @@ -656,7 +680,7 @@ generate_fragment(struct llvmpipe_context *lp, function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); - variant->function = function; + variant->function[do_tri_test] = function; /* XXX: need to propagate noalias down into color param now we are @@ -738,6 +762,7 @@ generate_fragment(struct llvmpipe_context *lp, &fs_mask[i], /* output */ out_color, depth_ptr_i, + do_tri_test, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); @@ -812,10 +837,10 @@ generate_fragment(struct llvmpipe_context *lp, /* * Translate the LLVM IR into machine code. */ - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); + variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) - lp_disassemble(variant->jit_function); + lp_disassemble(variant->jit_function[do_tri_test]); } @@ -887,7 +912,8 @@ generate_variant(struct llvmpipe_context *lp, variant->shader = shader; memcpy(&variant->key, key, sizeof *key); - generate_fragment(lp, shader, variant); + generate_fragment(lp, shader, variant, 0); + generate_fragment(lp, shader, variant, 1); /* insert new variant into linked list */ variant->next = shader->variants; @@ -947,11 +973,15 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) variant = shader->variants; while(variant) { struct lp_fragment_shader_variant *next = variant->next; + unsigned i; - if(variant->function) { - if(variant->jit_function) - LLVMFreeMachineCodeForFunction(screen->engine, variant->function); - LLVMDeleteFunction(variant->function); + for (i = 0; i < Elements(variant->function); i++) { + if (variant->function[i]) { + if (variant->jit_function[i]) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function[i]); + LLVMDeleteFunction(variant->function[i]); + } } FREE(variant); @@ -1093,7 +1123,8 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) !shader->info.uses_kill ? TRUE : FALSE; - lp_setup_set_fs_function(lp->setup, - shader->current->jit_function, - opaque); + lp_setup_set_fs_functions(lp->setup, + shader->current->jit_function[0], + shader->current->jit_function[1], + opaque); } -- cgit v1.2.3 From 4bef3575e605d890d9f228391b4724d27b025f49 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:25:13 -0700 Subject: llvmpipe: change 'in' to boolean, add comments --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 716b88073e5..e7de6431d38 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -441,7 +441,7 @@ do_triangle_ccw(struct setup_context *setup, int cx1 = c1; int cx2 = c2; int cx3 = c3; - int in = 0; + boolean in = FALSE; /* are we inside the triangle? */ for (x = minx; x <= maxx; x++) { @@ -451,13 +451,13 @@ do_triangle_ccw(struct setup_context *setup, { /* do nothing */ if (in) - break; + break; /* exiting triangle, all done with this row */ } else if (cx1 + ei1 > 0 && cx2 + ei2 > 0 && cx3 + ei3 > 0) { - in = 1; + in = TRUE; /* triangle covers the whole tile- shade whole tile */ if(setup->fs.current.opaque) { lp_scene_bin_reset( scene, x, y ); @@ -471,7 +471,7 @@ do_triangle_ccw(struct setup_context *setup, } else { - in = 1; + in = TRUE; /* shade partial tile */ lp_scene_bin_command( scene, x, y, lp_rast_triangle, -- cgit v1.2.3 From 12ba9e99db51a4a9e2e28a0574ef59f6548d8a84 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:26:35 -0700 Subject: llvmpipe: added comment about lookup-tables vs. computation --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 9c3f699ec71..92769beee1f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -40,6 +40,9 @@ * Map an index in [0,15] to an x,y position, multiplied by 4. * This is used to get the position of each subtile in a 4x4 * grid of edge step values. + * Note: we can use some bit twiddling to compute these values instead + * of using a look-up table, but there's no measurable performance + * difference. */ static const int pos_table4[16][2] = { { 0, 0 }, -- cgit v1.2.3 From 47fee146879aa8ac7f216c8ac5f3a84270266287 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:35:28 -0700 Subject: llvmpipe: skip 4x4 in/out test code It's a litte faster to just do the in/out testing in the shader jit code. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 92769beee1f..b3d1e7dee45 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -149,10 +149,6 @@ do_block_16( struct lp_rasterizer_task *rast_task, int c2, int c3 ) { - const int ei1 = tri->ei1 * 4; - const int ei2 = tri->ei2 * 4; - const int ei3 = tri->ei3 * 4; - const int eo1 = tri->eo1 * 4; const int eo2 = tri->eo2 * 4; const int eo3 = tri->eo3 * 4; @@ -175,16 +171,10 @@ do_block_16( struct lp_rasterizer_task *rast_task, else { int px = x + pos_table4[i][0]; int py = y + pos_table4[i][1]; - if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) { - /* the block is completely inside the triangle */ - block_full_4(rast_task, tri, px, py); - } - else { - /* the block is partially in/out of the triangle */ - do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); - } + /* Don't bother testing if the 4x4 block is entirely in/out of + * the triangle. It's a little faster to do it in the jit code. + */ + do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); } } } -- cgit v1.2.3 From adb48d535082f5a311751e1866997e381b2d3038 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 11:52:13 -0700 Subject: llvmpipe: remove lp_rast_triangle::min/max fields These values aren't needed outside the do_triangle_ccw() function. --- src/gallium/drivers/llvmpipe/lp_rast.h | 6 ------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 2a97fe4c67b..21ebfa7ca91 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -97,12 +97,6 @@ struct lp_rast_shader_inputs { * Objects of this type are put into the setup_context::data buffer. */ struct lp_rast_triangle { - /* bounding box of tri (in pixels) */ - int minx; - int maxx; - int miny; - int maxy; - /* one-pixel sized trivial accept offsets for each plane */ int ei1; int ei2; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e7de6431d38..ae354b3870e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -288,13 +288,13 @@ do_triangle_ccw(struct setup_context *setup, } /* Bounding rectangle (in pixels) */ - tri->minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - tri->maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - tri->miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - tri->maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - if (tri->miny == tri->maxy || - tri->minx == tri->maxx) { + if (miny == maxy || + minx == maxx) { lp_scene_putback_data( scene, sizeof *tri ); return; } @@ -384,10 +384,10 @@ do_triangle_ccw(struct setup_context *setup, /* Convert to tile coordinates: */ - minx = tri->minx / TILE_SIZE; - miny = tri->miny / TILE_SIZE; - maxx = tri->maxx / TILE_SIZE; - maxy = tri->maxy / TILE_SIZE; + minx = minx / TILE_SIZE; + miny = miny / TILE_SIZE; + maxx = maxx / TILE_SIZE; + maxy = maxy / TILE_SIZE; /* Clamp maxx, maxy to framebuffer size */ -- cgit v1.2.3 From fdfe06ad804ea13e6e436d66c1bcafe0bde2f545 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 15 Jan 2010 12:06:00 -0700 Subject: llvmpipe: implement scissor test in triangle setup --- src/gallium/drivers/llvmpipe/lp_setup.c | 4 +++- src/gallium/drivers/llvmpipe/lp_setup.h | 3 ++- src/gallium/drivers/llvmpipe/lp_setup_context.h | 1 + src/gallium/drivers/llvmpipe/lp_setup_tri.c | 7 +++++++ src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 3 ++- 5 files changed, 15 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 355c0518372..f52dce65d74 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -339,13 +339,15 @@ lp_setup_fence( struct setup_context *setup ) void lp_setup_set_triangle_state( struct setup_context *setup, unsigned cull_mode, - boolean ccw_is_frontface) + boolean ccw_is_frontface, + boolean scissor ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; + setup->scissor_test = scissor; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 407f7527770..5081da29d11 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -88,7 +88,8 @@ lp_setup_bind_framebuffer( struct setup_context *setup, void lp_setup_set_triangle_state( struct setup_context *setup, unsigned cullmode, - boolean front_is_ccw ); + boolean front_is_ccw, + boolean scissor ); void lp_setup_set_fs_inputs( struct setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index fc0aef1376c..a5fc34e54a2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -87,6 +87,7 @@ struct setup_context boolean flatshade_first; boolean ccw_is_frontface; + boolean scissor_test; unsigned cullmode; struct pipe_framebuffer_state fb; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index ae354b3870e..018d254c765 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -293,6 +293,13 @@ do_triangle_ccw(struct setup_context *setup, miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + if (setup->scissor_test) { + minx = MAX2(minx, setup->scissor.current.minx); + maxx = MIN2(maxx, setup->scissor.current.maxx); + miny = MAX2(miny, setup->scissor.current.miny); + maxy = MIN2(maxy, setup->scissor.current.maxy); + } + if (miny == maxy || minx == maxx) { lp_scene_putback_data( scene, sizeof *tri ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 7d4c310aae8..feb012816c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -61,7 +61,8 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, if (llvmpipe->rasterizer) { lp_setup_set_triangle_state( llvmpipe->setup, llvmpipe->rasterizer->cull_mode, - llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW ); + llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, + llvmpipe->rasterizer->scissor); } llvmpipe->dirty |= LP_NEW_RASTERIZER; -- cgit v1.2.3 From 591401ff05f878ff1607a1a34db1319103025d8f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 16 Jan 2010 21:12:10 +0000 Subject: llvmpipe: use new u_ringbuffer for scene queue --- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- src/gallium/drivers/llvmpipe/lp_scene_queue.c | 114 ++++++++------------------ src/gallium/drivers/llvmpipe/lp_scene_queue.h | 8 +- src/gallium/drivers/llvmpipe/lp_setup.c | 11 ++- 4 files changed, 46 insertions(+), 89 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d03ba1752d6..2e2ebee45de 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -844,7 +844,7 @@ thread_func( void *init_data ) const struct pipe_framebuffer_state *fb; boolean write_depth; - rast->curr_scene = lp_scene_dequeue( rast->full_scenes ); + rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE ); lp_scene_bin_iter_begin( rast->curr_scene ); diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/src/gallium/drivers/llvmpipe/lp_scene_queue.c index 8d65a6a6fa2..43d74e4d89d 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene_queue.c +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.c @@ -32,8 +32,7 @@ * which are produced by the "rast" code when it finishes rendering a scene. */ - -#include "pipe/p_thread.h" +#include "util/u_ringbuffer.h" #include "util/u_memory.h" #include "lp_scene_queue.h" @@ -41,20 +40,17 @@ #define MAX_SCENE_QUEUE 4 +struct scene_packet { + struct util_packet header; + struct lp_scene *scene; +}; /** * A queue of scenes */ struct lp_scene_queue { - /** XXX might use a linked list here somedone, but the list will - * probably always be pretty short. - */ - struct lp_scene *scenes[MAX_SCENE_QUEUE]; - unsigned count; - - pipe_condvar count_change; - pipe_mutex mutex; + struct util_ringbuffer *ring; }; @@ -64,11 +60,19 @@ struct lp_scene_queue * lp_scene_queue_create(void) { struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue); - if (queue) { - pipe_condvar_init(queue->count_change); - pipe_mutex_init(queue->mutex); - } + if (queue == NULL) + return NULL; + + queue->ring = util_ringbuffer_create( MAX_SCENE_QUEUE * + sizeof( struct scene_packet ) / 4); + if (queue->ring == NULL) + goto fail; + return queue; + +fail: + FREE(queue); + return NULL; } @@ -76,41 +80,26 @@ lp_scene_queue_create(void) void lp_scene_queue_destroy(struct lp_scene_queue *queue) { - pipe_condvar_destroy(queue->count_change); - pipe_mutex_destroy(queue->mutex); + util_ringbuffer_destroy(queue->ring); + FREE(queue); } /** Remove first lp_scene from head of queue */ struct lp_scene * -lp_scene_dequeue(struct lp_scene_queue *queue) +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait) { - struct lp_scene *scene; - unsigned i; - - pipe_mutex_lock(queue->mutex); - while (queue->count == 0) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - - assert(queue->count >= 1); - - /* get head */ - scene = queue->scenes[0]; - - /* shift entries */ - for (i = 0; i < queue->count - 1; i++) { - queue->scenes[i] = queue->scenes[i + 1]; - } + struct scene_packet packet; + enum pipe_error ret; - queue->count--; + ret = util_ringbuffer_dequeue(queue->ring, + &packet.header, + sizeof packet / 4, + wait ); + if (ret != PIPE_OK) + return NULL; - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); - - return scene; + return packet.scene; } @@ -118,47 +107,16 @@ lp_scene_dequeue(struct lp_scene_queue *queue) void lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene) { - pipe_mutex_lock(queue->mutex); - - assert(queue->count < MAX_SCENE_QUEUE); + struct scene_packet packet; - /* debug: check that scene is not already in the queue */ - if (0) { - unsigned i; - for (i = 0; i < queue->count; i++) { - assert(queue->scenes[i] != scene); - } - } + packet.header.dwords = sizeof packet / 4; + packet.header.data24 = 0; + packet.scene = scene; - /* add to end */ - queue->scenes[queue->count++] = scene; - - /* signal size change */ - pipe_condvar_signal(queue->count_change); - - pipe_mutex_unlock(queue->mutex); + util_ringbuffer_enqueue(queue->ring, &packet.header); } -/** Return number of entries in the queue */ -unsigned -lp_scene_queue_count(struct lp_scene_queue *queue) -{ - unsigned count; - pipe_mutex_lock(queue->mutex); - count = queue->count; - pipe_mutex_unlock(queue->mutex); - return count; -} -/** Wait until the queue has exactly 'count' entries */ -void -lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned count) -{ - pipe_mutex_lock(queue->mutex); - while (queue->count != count) { - pipe_condvar_wait(queue->count_change, queue->mutex); - } - pipe_mutex_unlock(queue->mutex); -} + diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h index 1bd475fa504..fd7c65a2c8b 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene_queue.h +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -40,16 +40,12 @@ void lp_scene_queue_destroy(struct lp_scene_queue *queue); struct lp_scene * -lp_scene_dequeue(struct lp_scene_queue *queue); +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait); void -lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *bins); +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene); -unsigned -lp_scene_queue_count(struct lp_scene_queue *queue); -void -lp_scene_queue_wait_count(struct lp_scene_queue *queue, unsigned size); #endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f52dce65d74..d4a4724ad1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -57,8 +57,11 @@ struct lp_scene * lp_setup_get_current_scene(struct setup_context *setup) { if (!setup->scene) { - /* wait for a free/empty bin */ - setup->scene = lp_scene_dequeue(setup->empty_scenes); + + /* wait for a free/empty scene + */ + setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); + if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ lp_scene_set_framebuffer_size(setup->scene, @@ -651,8 +654,8 @@ lp_setup_destroy( struct setup_context *setup ) pipe_buffer_reference(&setup->constants.current, NULL); /* free the scenes in the 'empty' queue */ - while (lp_scene_queue_count(setup->empty_scenes) > 0) { - struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes); + while (1) { + struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes, FALSE); if (!scene) break; lp_scene_destroy(scene); -- cgit v1.2.3 From 62623c4dc5d8b646942bc65e8de350e812945ad1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 18 Jan 2010 13:10:10 -0700 Subject: llvmpipe: added show_tiles and show_subtiles debug options These options draw lines over the tiles to show the 64x64 tile bounds and 16x16 sub-tile bounds. For debugging/visualization. --- src/gallium/drivers/llvmpipe/lp_debug.h | 3 ++ src/gallium/drivers/llvmpipe/lp_rast.c | 60 ++++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ 3 files changed, 65 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 74b27574942..7128e8eb4b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -45,6 +45,9 @@ st_print_current(void); #define DEBUG_QUERY 0x40 #define DEBUG_SCREEN 0x80 #define DEBUG_JIT 0x100 +#define DEBUG_SHOW_TILES 0x200 +#define DEBUG_SHOW_SUBTILES 0x400 + #ifdef DEBUG extern int LP_DEBUG; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2e2ebee45de..440bb322358 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -461,6 +461,61 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, } +#ifdef DEBUG +/** + * Set top row and left column of the tile's pixels to white. For debugging. + */ +static void +outline_tile(uint8_t *tile) +{ + const uint8_t val = 0xff; + unsigned i; + + for (i = 0; i < TILE_SIZE; i++) { + TILE_PIXEL(tile, i, 0, 0) = val; + TILE_PIXEL(tile, i, 0, 1) = val; + TILE_PIXEL(tile, i, 0, 2) = val; + TILE_PIXEL(tile, i, 0, 3) = val; + + TILE_PIXEL(tile, 0, i, 0) = val; + TILE_PIXEL(tile, 0, i, 1) = val; + TILE_PIXEL(tile, 0, i, 2) = val; + TILE_PIXEL(tile, 0, i, 3) = val; + } +} +#endif /* DEBUG */ + + +#ifdef DEBUG +/** + * Draw grid of gray lines at 16-pixel intervals across the tile to + * show the sub-tile boundaries. For debugging. + */ +static void +outline_subtiles(uint8_t *tile) +{ + const uint8_t val = 0x80; + const unsigned step = 16; + unsigned i, j; + + for (i = 0; i < TILE_SIZE; i += 16) { + for (j = 0; j < TILE_SIZE; j++) { + TILE_PIXEL(tile, i, j, 0) = val; + TILE_PIXEL(tile, i, j, 1) = val; + TILE_PIXEL(tile, i, j, 2) = val; + TILE_PIXEL(tile, i, j, 3) = val; + + TILE_PIXEL(tile, j, i, 0) = val; + TILE_PIXEL(tile, j, i, 1) = val; + TILE_PIXEL(tile, j, i, 2) = val; + TILE_PIXEL(tile, j, i, 3) = val; + } + } + + outline_tile(tile); +} +#endif /* DEBUG */ + /** @@ -500,6 +555,11 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, thread_index, x, y, w, h); + if (LP_DEBUG & DEBUG_SHOW_SUBTILES) + outline_subtiles(rast->tasks[thread_index].tile.color[i]); + else if (LP_DEBUG & DEBUG_SHOW_TILES) + outline_tile(rast->tasks[thread_index].tile.color[i]); + lp_tile_write_4ub(transfer->texture->format, rast->tasks[thread_index].tile.color[i], rast->cbuf_map[i], diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index a28f6935b68..72f2e8ebf80 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -52,6 +52,8 @@ static const struct debug_named_value lp_debug_flags[] = { { "query", DEBUG_QUERY }, { "screen", DEBUG_SCREEN }, { "jit", DEBUG_JIT }, + { "show_tiles", DEBUG_SHOW_TILES }, + { "show_subtiles", DEBUG_SHOW_SUBTILES }, {NULL, 0} }; #endif -- cgit v1.2.3 From 89bb07730b1c0f292d1d70a99466e8a885fb87bf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 18 Jan 2010 14:35:43 -0700 Subject: util: fix broken util_ringbuffer_dequeue() The tests for an empty ring buffer were incorrect. Fixes glxinfo segfaults. Plus, add a new assertion. --- src/gallium/auxiliary/util/u_ringbuffer.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c index 3f43a19e018..e73ba0b3481 100644 --- a/src/gallium/auxiliary/util/u_ringbuffer.c +++ b/src/gallium/auxiliary/util/u_ringbuffer.c @@ -53,11 +53,22 @@ void util_ringbuffer_destroy( struct util_ringbuffer *ring ) FREE(ring); } +/** + * Return number of free entries in the ring + */ static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring ) { return (ring->tail - (ring->head + 1)) & ring->mask; } +/** + * Is the ring buffer empty? + */ +static INLINE boolean util_ringbuffer_empty( const struct util_ringbuffer *ring ) +{ + return util_ringbuffer_space(ring) == ring->mask; +} + void util_ringbuffer_enqueue( struct util_ringbuffer *ring, const struct util_packet *packet ) { @@ -67,6 +78,10 @@ void util_ringbuffer_enqueue( struct util_ringbuffer *ring, */ pipe_mutex_lock(ring->mutex); + /* make sure we don't request an impossible amount of space + */ + assert(packet->dwords <= ring->mask); + /* Wait for free space: */ while (util_ringbuffer_space(ring) < packet->dwords) @@ -104,14 +119,14 @@ enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, */ pipe_mutex_lock(ring->mutex); - /* Wait for free space: + /* Get next ring entry: */ if (wait) { - while (util_ringbuffer_space(ring) == 0) + while (util_ringbuffer_empty(ring)) pipe_condvar_wait(ring->change, ring->mutex); } else { - if (util_ringbuffer_space(ring) == 0) { + if (util_ringbuffer_empty(ring)) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } -- cgit v1.2.3 From 9a23d810be02edf740ce58196435cd6cdfd903c9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 18 Jan 2010 17:39:54 -0700 Subject: llvmpipe: tweak a comment --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 4b51d6b964f..ab545ed3de6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -666,7 +666,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[8] = LLVMInt32Type(); /* c0 */ - arg_types[9] = LLVMInt32Type(); /* c1 */ + arg_types[9] = LLVMInt32Type(); /* c1 */ arg_types[10] = LLVMInt32Type(); /* c2 */ /* Note: the step arrays are built as int32[16] but we interpret * them here as int32_vec4[4]. -- cgit v1.2.3 From 75f262b8b441e05f5b8811db1c205220200d64ad Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 09:30:13 -0700 Subject: llvmpipe: updated comments --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 018d254c765..0d89bef606d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -440,8 +440,10 @@ do_triangle_ccw(struct setup_context *setup, int x, y; - /* Trivially accept or reject blocks, else jump to per-pixel - * examination above. + /* Test tile-sized blocks against the triangle. + * Discard blocks fully outside the tri. If the block is fully + * contained inside the tri, bin an lp_rast_shade_tile command. + * Else, bin a lp_rast_triangle command. */ for (y = miny; y <= maxy; y++) { -- cgit v1.2.3 From 0fccfc9cc0cb7699598f1739d8cd3811175cdf13 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 09:30:44 -0700 Subject: llvmpipe: remove unneeded DEBUG checks, use step var --- src/gallium/drivers/llvmpipe/lp_rast.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 440bb322358..5fe939d234b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -461,7 +461,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, } -#ifdef DEBUG /** * Set top row and left column of the tile's pixels to white. For debugging. */ @@ -483,10 +482,8 @@ outline_tile(uint8_t *tile) TILE_PIXEL(tile, 0, i, 3) = val; } } -#endif /* DEBUG */ -#ifdef DEBUG /** * Draw grid of gray lines at 16-pixel intervals across the tile to * show the sub-tile boundaries. For debugging. @@ -498,7 +495,7 @@ outline_subtiles(uint8_t *tile) const unsigned step = 16; unsigned i, j; - for (i = 0; i < TILE_SIZE; i += 16) { + for (i = 0; i < TILE_SIZE; i += step) { for (j = 0; j < TILE_SIZE; j++) { TILE_PIXEL(tile, i, j, 0) = val; TILE_PIXEL(tile, i, j, 1) = val; @@ -514,7 +511,6 @@ outline_subtiles(uint8_t *tile) outline_tile(tile); } -#endif /* DEBUG */ -- cgit v1.2.3 From 1073e39ab92a795f7b3958dd789ab324c82c00ae Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 09:45:51 -0700 Subject: llvmpipe: re-order file list, fix indentation --- src/gallium/drivers/llvmpipe/Makefile | 8 ++++---- src/gallium/drivers/llvmpipe/SConscript | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 3d8d88179a9..71e7c2b5d98 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -6,8 +6,6 @@ LIBNAME = llvmpipe DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ - lp_scene.c \ - lp_scene_queue.c \ lp_bld_alpha.c \ lp_bld_arit.c \ lp_bld_blend_aos.c \ @@ -38,15 +36,17 @@ C_SOURCES = \ lp_fence.c \ lp_flush.c \ lp_jit.c \ + lp_query.c \ lp_rast.c \ lp_rast_tri.c \ + lp_scene.c \ + lp_scene_queue.c \ + lp_screen.c \ lp_setup.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ lp_setup_vbuf.c \ - lp_query.c \ - lp_screen.c \ lp_state_blend.c \ lp_state_clip.c \ lp_state_derived.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5af77c4a12d..c4e7a4a22f9 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -32,14 +32,14 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', - 'lp_bld_format_query.c', + 'lp_bld_format_query.c', 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', 'lp_bld_logic.c', 'lp_bld_misc.cpp', - 'lp_bld_pack.c', - 'lp_bld_sample.c', + 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_swizzle.c', @@ -53,6 +53,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_flush.c', 'lp_jit.c', 'lp_query.c', + 'lp_rast.c', + 'lp_rast_tri.c', 'lp_scene.c', 'lp_scene_queue.c', 'lp_screen.c', @@ -71,8 +73,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_rast.c', - 'lp_rast_tri.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_soa.c', -- cgit v1.2.3 From d8d80a8d74416bffd274d3b0597706374a0c1cc8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 11:58:43 -0700 Subject: llvmpipe: fix-up comment --- src/gallium/drivers/llvmpipe/lp_texture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 14f636e4aeb..1c92d7f7225 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -65,7 +65,8 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, unsigned nblocksx, nblocksy; /* Allocate storage for whole quads. This is particularly important - * for depth surfaces, which are currently stored in a swizzled format. */ + * for depth surfaces, which are currently stored in a swizzled format. + */ nblocksx = util_format_get_nblocksx(pt->format, align(width, 2)); nblocksy = util_format_get_nblocksy(pt->format, align(height, 2)); -- cgit v1.2.3 From ec459f2aeca39e51f495cde455ba18d0a9489caa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jan 2010 16:58:25 -0700 Subject: llvmpipe: asst. task-related clean-ups --- src/gallium/drivers/llvmpipe/lp_rast.c | 51 +++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5fe939d234b..05901d07aa5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -275,7 +275,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, assert(h <= TILE_SIZE); lp_tile_read_4ub(transfer->texture->format, - rast->tasks[thread_index].tile.color[i], + task->tile.color[i], rast->cbuf_map[i], transfer->stride, x, y, @@ -309,8 +309,9 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - const unsigned x = rast->tasks[thread_index].x; - const unsigned y = rast->tasks[thread_index].y; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -323,7 +324,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_read_z32(rast->tasks[thread_index].tile.depth, + lp_tile_read_z32(task->tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, x, y, w, h); @@ -353,11 +354,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, unsigned thread_index, const union lp_rast_cmd_arg arg ) { - const struct lp_rast_state *state = rast->tasks[thread_index].current_state; - struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned tile_x = rast->tasks[thread_index].x; - const unsigned tile_y = rast->tasks[thread_index].y; + const unsigned tile_x = task->x; + const unsigned tile_y = task->y; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -404,8 +406,9 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3) { - const struct lp_rast_state *state = rast->tasks[thread_index].current_state; - struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; unsigned i; @@ -520,8 +523,9 @@ outline_subtiles(uint8_t *tile) static void lp_rast_store_color( struct lp_rasterizer *rast, unsigned thread_index) { - const unsigned x = rast->tasks[thread_index].x; - const unsigned y = rast->tasks[thread_index].y; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; unsigned i; for (i = 0; i < rast->state.fb.nr_cbufs; i++) { @@ -552,12 +556,12 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, thread_index, x, y, w, h); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) - outline_subtiles(rast->tasks[thread_index].tile.color[i]); + outline_subtiles(task->tile.color[i]); else if (LP_DEBUG & DEBUG_SHOW_TILES) - outline_tile(rast->tasks[thread_index].tile.color[i]); + outline_tile(task->tile.color[i]); lp_tile_write_4ub(transfer->texture->format, - rast->tasks[thread_index].tile.color[i], + task->tile.color[i], rast->cbuf_map[i], transfer->stride, x, y, @@ -587,8 +591,9 @@ lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, static void lp_rast_store_zstencil( struct lp_rasterizer *rast, unsigned thread_index ) { - const unsigned x = rast->tasks[thread_index].x; - const unsigned y = rast->tasks[thread_index].y; + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -601,7 +606,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_write_z32(rast->tasks[thread_index].tile.depth, + lp_tile_write_z32(task->tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, x, y, w, h); @@ -991,12 +996,14 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { + struct lp_rasterizer_task *task = &rast->tasks[i]; + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); - rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - rast->tasks[i].rast = rast; - rast->tasks[i].thread_index = i; + task->tile.depth = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); + task->rast = rast; + task->thread_index = i; } create_rast_threads(rast); -- cgit v1.2.3 From 4d2dc9da82fcb0464b88c273a606f16d0183a758 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 11:32:48 -0700 Subject: llvmpipe: updated comment --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ab545ed3de6..0053c1b88b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -31,6 +31,8 @@ * Code generate the whole fragment pipeline. * * The fragment pipeline consists of the following stages: + * - triangle edge in/out testing + * - scissor test * - stipple (TBI) * - early depth test * - fragment shader -- cgit v1.2.3 From 9b534400d9969eceac46b28145405086dda8c113 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:19:00 -0700 Subject: llvmpipe: add makefile rule for generating .s files --- src/gallium/drivers/llvmpipe/Makefile | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 71e7c2b5d98..666aa7293ef 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -68,3 +68,8 @@ include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ + + +# to make a .s file to inspect assembly code +.c.s: + $(CC) -S $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -- cgit v1.2.3 From 58903b378188861a6f7a67bbfb07424b73df2a1b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:23:11 -0700 Subject: llvmpipe: put TILE_SIZE #define in its own header To avoid including lp_tile_soa.h in other places. --- src/gallium/drivers/llvmpipe/lp_tile_size.h | 39 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tile_soa.h | 15 +++-------- 2 files changed, 43 insertions(+), 11 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_tile_size.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_size.h b/src/gallium/drivers/llvmpipe/lp_tile_size.h new file mode 100644 index 00000000000..f0b983c0632 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_size.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_TILE_SIZE_H +#define LP_TILE_SIZE_H + + +/** + * Tile size (width and height). This needs to be a power of two. + */ +#define TILE_ORDER 6 +#define TILE_SIZE (1 << TILE_ORDER) + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 1b7be3cce0d..eea3ab84990 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -30,7 +30,7 @@ #include "pipe/p_compiler.h" #include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ - +#include "lp_tile_size.h" #ifdef __cplusplus extern "C" { @@ -40,22 +40,15 @@ extern "C" { struct pipe_transfer; -/** - * Cache tile size (width and height). This needs to be a power of two. - */ -#define TILE_ORDER 6 -#define TILE_SIZE (1 << TILE_ORDER) - - #define TILE_VECTOR_HEIGHT 4 #define TILE_VECTOR_WIDTH 4 extern const unsigned char tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; -#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH) -#define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) -#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) +#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH) //16 +#define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) //64 +#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024 #define TILE_PIXEL(_p, _x, _y, _c) \ ((_p)[((_y) / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE + \ -- cgit v1.2.3 From 0706dae088e5b46c4cad1a5ee41038e05c7f363b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:44:12 -0700 Subject: llvmpipe: align display target size to multiple of tile size This will allow us to skip clipping tiles to surface bounds. --- src/gallium/drivers/llvmpipe/lp_texture.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 1c92d7f7225..36e2ebb41a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -40,9 +40,10 @@ #include "util/u_memory.h" #include "lp_context.h" +#include "lp_screen.h" #include "lp_state.h" #include "lp_texture.h" -#include "lp_screen.h" +#include "lp_tile_size.h" #include "lp_winsys.h" @@ -67,8 +68,8 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, /* Allocate storage for whole quads. This is particularly important * for depth surfaces, which are currently stored in a swizzled format. */ - nblocksx = util_format_get_nblocksx(pt->format, align(width, 2)); - nblocksy = util_format_get_nblocksy(pt->format, align(height, 2)); + nblocksx = util_format_get_nblocksx(pt->format, align(width, TILE_SIZE)); + nblocksy = util_format_get_nblocksy(pt->format, align(height, TILE_SIZE)); lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16); @@ -96,10 +97,15 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, { struct llvmpipe_winsys *winsys = screen->winsys; + /* Round up the surface size to a multiple of the tile size to + * avoid tile clipping. + */ + unsigned width = align(lpt->base.width0, TILE_SIZE); + unsigned height = align(lpt->base.height0, TILE_SIZE); + lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width0, - lpt->base.height0, + width, height, 16, &lpt->stride[0] ); @@ -299,8 +305,8 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, pipe_texture_reference(&pt->texture, texture); pt->x = x; pt->y = y; - pt->width = w; - pt->height = h; + pt->width = align(w, TILE_SIZE); + pt->height = align(h, TILE_SIZE); pt->stride = lptex->stride[level]; pt->usage = usage; pt->face = face; -- cgit v1.2.3 From 7319ae0954980196822a09d914e8b7d9cad07d16 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:47:22 -0700 Subject: llvmpipe: remove tile clipping code The surface is always a multiple of the tile size now. --- src/gallium/drivers/llvmpipe/lp_rast.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 05901d07aa5..e27b6528eaf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -261,13 +261,6 @@ void lp_rast_load_color( struct lp_rasterizer *rast, if (y >= transfer->height) continue; - /* XXX: require tile-size aligned render target dimensions: - */ - if (x + w > transfer->width) - w -= x + w - transfer->width; - - if (y + h > transfer->height) - h -= y + h - transfer->height; assert(w >= 0); assert(h >= 0); @@ -539,19 +532,6 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, if (y >= transfer->height) continue; - /* XXX: require tile-size aligned render target dimensions: - */ - if (x + w > transfer->width) - w -= x + w - transfer->width; - - if (y + h > transfer->height) - h -= y + h - transfer->height; - - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, thread_index, x, y, w, h); -- cgit v1.2.3 From 63f249bf909cab60635c2df9122db86eaab6c421 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Jan 2010 17:48:09 -0700 Subject: llvmpipe: optimize tile writing code The code which converts/copies color tiles to the linear layout has been rewritten. There's less arithmetic and better loop unrolling, and possibly a better memory access pattern. Some demos, like gears, are about 20% faster now. --- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 127 ++++++++++++++++++++++++---- 1 file changed, 109 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index a603b7f9f42..5d53689a3db 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -129,22 +129,8 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): print -def generate_format_write(format, src_type, src_native_type, src_suffix): - '''Generate the function to write pixels to a particular format''' - - name = short_name(format) - - dst_native_type = native_type(format) - - print 'static void' - print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) - print '{' - print ' unsigned x, y;' - print ' uint8_t *dst_row = dst + y0*dst_stride;' - print ' for (y = 0; y < h; ++y) {' - print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) - print ' for (x = 0; x < w; ++x) {' - +def compute_inverse_swizzle(format): + '''Return an array[4] of inverse swizzle terms''' inv_swizzle = [None]*4 if format.colorspace == 'rgb': for i in range(4): @@ -155,8 +141,86 @@ def generate_format_write(format, src_type, src_native_type, src_suffix): swizzle = format.out_swizzle[0] if swizzle < 4: inv_swizzle[swizzle] = 0 - else: - assert False + return inv_swizzle + + +def pack_rgba(format, src_type, r, g, b, a): + """Return an expression for packing r, g, b, a into a pixel of the + given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' + """ + assert format.colorspace == 'rgb' + inv_swizzle = compute_inverse_swizzle(format) + shift = 0 + expr = None + for i in range(4): + # choose r, g, b, or a depending on the inverse swizzle term + if inv_swizzle[i] == 0: + value = r + elif inv_swizzle[i] == 1: + value = g + elif inv_swizzle[i] == 2: + value = b + elif inv_swizzle[i] == 3: + value = a + else: + value = None + + if value: + dst_type = format.in_types[i] + dst_native_type = native_type(format) + value = conversion_expr(src_type, dst_type, dst_native_type, value) + term = "((%s) << %d)" % (value, shift) + if expr: + expr = expr + " | " + term + else: + expr = term + + width = format.in_types[i].size + shift = shift + width + return expr + + +def emit_unrolled_write_code(format, src_type): + '''Emit code for writing a block based on unrolled loops. + This is considerably faster than the TILE_PIXEL-based code below. + ''' + dst_native_type = native_type(format) + print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride() + print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) + print ' unsigned int qx, qy, i;' + print + print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {' + print ' const unsigned py = y0 + qy;' + print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {' + print ' const unsigned px = x0 + qx;' + print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;' + print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;' + print ' const uint8_t *b = src + 2 * TILE_C_STRIDE;' + print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;' + print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */' + print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {' + print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_type, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") + print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_type, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") + print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);' + print ' dstpix[offset + 0] = pixel0;' + print ' dstpix[offset + 1] = pixel1;' + print ' }' + print ' src += TILE_X_STRIDE;' + print ' }' + print ' }' + + +def emit_tile_pixel_write_code(format, src_type): + '''Emit code for writing a block based on the TILE_PIXEL macro.''' + dst_native_type = native_type(format) + + inv_swizzle = compute_inverse_swizzle(format) + + print ' unsigned x, y;' + print ' uint8_t *dst_row = dst + y0*dst_stride;' + print ' for (y = 0; y < h; ++y) {' + print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) + print ' for (x = 0; x < w; ++x) {' if format.layout == ARITH: print ' %s pixel = 0;' % dst_native_type @@ -185,6 +249,20 @@ def generate_format_write(format, src_type, src_native_type, src_suffix): print ' }' print ' dst_row += dst_stride;' print ' }' + + +def generate_format_write(format, src_type, src_native_type, src_suffix): + '''Generate the function to write pixels to a particular format''' + + name = short_name(format) + + print 'static void' + print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print '{' + if format.layout == ARITH and format.colorspace == 'rgb': + emit_unrolled_write_code(format, src_type) + else: + emit_tile_pixel_write_code(format, src_type) print '}' print @@ -265,6 +343,19 @@ def main(): print ' { 10, 11, 14, 15}' print '};' print + print '/* Note: these lookup tables could be replaced with some' + print ' * bit-twiddling code, but this is a little faster.' + print ' */' + print 'static unsigned tile_x_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' + print ' 0, 1, 0, 1, 2, 3, 2, 3,' + print ' 0, 1, 0, 1, 2, 3, 2, 3' + print '};' + print + print 'static unsigned tile_y_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' + print ' 0, 0, 1, 1, 0, 0, 1, 1,' + print ' 2, 2, 3, 3, 2, 2, 3, 3' + print '};' + print generate_clamp() -- cgit v1.2.3 From cd9d9e2436a0815f6ed3a61d2cdf8fad53278506 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 14:59:01 -0700 Subject: llvmpipe: added simple perf/statistics counting facility Currently counting number of tris, how many tiles of each size are fully covered, partially covered or empty, etc. Set LP_DEBUG=counters to enable. Results are printed upon context destruction. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_context.c | 5 ++ src/gallium/drivers/llvmpipe/lp_debug.h | 1 + src/gallium/drivers/llvmpipe/lp_perf.c | 86 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_perf.h | 74 +++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 6 ++ src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup.h | 2 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 16 ++++-- 11 files changed, 189 insertions(+), 6 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_perf.c create mode 100644 src/gallium/drivers/llvmpipe/lp_perf.h (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 666aa7293ef..899af6acf84 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -36,6 +36,7 @@ C_SOURCES = \ lp_fence.c \ lp_flush.c \ lp_jit.c \ + lp_perf.c \ lp_query.c \ lp_rast.c \ lp_rast_tri.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index c4e7a4a22f9..d7a396292c2 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -52,6 +52,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', + 'lp_perf.c', 'lp_query.c', 'lp_rast.c', 'lp_rast_tri.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index c5b00f8e23f..51de6f93ca7 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -38,6 +38,7 @@ #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" +#include "lp_perf.h" #include "lp_state.h" #include "lp_surface.h" #include "lp_texture.h" @@ -54,6 +55,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); uint i; + lp_print_counters(); + /* This will also destroy llvmpipe->setup: */ if (llvmpipe->draw) @@ -195,6 +198,8 @@ llvmpipe_create( struct pipe_screen *screen ) lp_init_surface_functions(llvmpipe); + lp_reset_counters(); + return &llvmpipe->pipe; fail: diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 7128e8eb4b7..7e04bd471ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -47,6 +47,7 @@ st_print_current(void); #define DEBUG_JIT 0x100 #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 +#define DEBUG_COUNTERS 0x800 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c new file mode 100644 index 00000000000..2628d51069b --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "lp_debug.h" +#include "lp_perf.h" + + + +struct lp_counters lp_count; + + +void +lp_reset_counters(void) +{ + memset(&lp_count, 0, sizeof(lp_count)); +} + + +void +lp_print_counters(void) +{ + if (LP_DEBUG & DEBUG_COUNTERS) { + unsigned total_64, total_16, total_4; + float p1, p2, p3; + + debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); + debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); + + total_64 = (lp_count.nr_empty_64 + + lp_count.nr_fully_covered_64 + + lp_count.nr_partially_covered_64); + + p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; + p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; + p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; + + debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); + debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); + debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + + total_16 = (lp_count.nr_empty_16 + + lp_count.nr_fully_covered_16 + + lp_count.nr_partially_covered_16); + + p1 = 100.0 * (float) lp_count.nr_empty_16 / (float) total_16; + p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16; + p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16; + + debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); + debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); + debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + + total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4); + + p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; + p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; + + debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); + debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h new file mode 100644 index 00000000000..9886088c38e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Performance / statistic counters, etc. + */ + + +#ifndef LP_PERF_H +#define LP_PERF_H + + +/** + * Various counters + */ +struct lp_counters +{ + unsigned nr_tris; + unsigned nr_culled_tris; + unsigned nr_empty_64; + unsigned nr_fully_covered_64; + unsigned nr_partially_covered_64; + unsigned nr_empty_16; + unsigned nr_fully_covered_16; + unsigned nr_partially_covered_16; + unsigned nr_empty_4; + unsigned nr_non_empty_4; +}; + + +extern struct lp_counters lp_count; + + +/** Increment the named counter (only for debug builds) */ +#ifdef DEBUG +#define LP_COUNT(counter) lp_count.counter++ +#else +#define LP_COUNT(counter) +#endif + + +extern void +lp_reset_counters(void); + + +extern void +lp_print_counters(void); + + +#endif /* LP_PERF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index b3d1e7dee45..e9d15727a7b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -32,6 +32,7 @@ #include #include "util/u_math.h" #include "lp_debug.h" +#include "lp_perf.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" @@ -167,6 +168,7 @@ do_block_16( struct lp_rasterizer_task *rast_task, cx2 + eo2 < 0 || cx3 + eo3 < 0) { /* the block is completely outside the triangle - nop */ + LP_COUNT(nr_empty_4); } else { int px = x + pos_table4[i][0]; @@ -174,6 +176,7 @@ do_block_16( struct lp_rasterizer_task *rast_task, /* Don't bother testing if the 4x4 block is entirely in/out of * the triangle. It's a little faster to do it in the jit code. */ + LP_COUNT(nr_non_empty_4); do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); } } @@ -223,6 +226,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, cx2 + eo2 < 0 || cx3 + eo3 < 0) { /* the block is completely outside the triangle - nop */ + LP_COUNT(nr_empty_16); } else { int px = x + pos_table16[i][0]; @@ -232,10 +236,12 @@ lp_rast_triangle( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ + LP_COUNT(nr_fully_covered_16); block_full_16(rast_task, tri, px, py); } else { /* the block is partially in/out of the triangle */ + LP_COUNT(nr_partially_covered_16); do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); } } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 72f2e8ebf80..9dd4ea7ef67 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -54,6 +54,7 @@ static const struct debug_named_value lp_debug_flags[] = { { "jit", DEBUG_JIT }, { "show_tiles", DEBUG_SHOW_TILES }, { "show_subtiles", DEBUG_SHOW_SUBTILES }, + { "counters", DEBUG_COUNTERS }, {NULL, 0} }; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d4a4724ad1b..f8fc912fa1b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -499,7 +499,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, * Note: we have to check all scenes including any scenes currently * being rendered and the current scene being built. */ -boolean +unsigned lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 5081da29d11..0e155a7dc31 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -123,7 +123,7 @@ void lp_setup_set_sampler_textures( struct setup_context *setup, unsigned num, struct pipe_texture **texture); -boolean +unsigned lp_setup_is_texture_referenced( const struct setup_context *setup, const struct pipe_texture *texture ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 0d89bef606d..76ecab76447 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,10 +29,11 @@ * Binning code for triangles */ -#include "lp_setup_context.h" -#include "lp_rast.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_rast.h" #define NUM_CHANNELS 4 @@ -278,12 +279,15 @@ do_triangle_ccw(struct setup_context *setup, area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); + LP_COUNT(nr_tris); + /* Cull non-ccw and zero-sized triangles. * * XXX: subject to overflow?? */ if (area <= 0.0f) { lp_scene_putback_data( scene, sizeof *tri ); + LP_COUNT(nr_culled_tris); return; } @@ -303,6 +307,7 @@ do_triangle_ccw(struct setup_context *setup, if (miny == maxy || minx == maxx) { lp_scene_putback_data( scene, sizeof *tri ); + LP_COUNT(nr_culled_tris); return; } @@ -459,6 +464,7 @@ do_triangle_ccw(struct setup_context *setup, cx3 + eo3 < 0) { /* do nothing */ + LP_COUNT(nr_empty_64); if (in) break; /* exiting triangle, all done with this row */ } @@ -466,8 +472,9 @@ do_triangle_ccw(struct setup_context *setup, cx2 + ei2 > 0 && cx3 + ei3 > 0) { - in = TRUE; /* triangle covers the whole tile- shade whole tile */ + LP_COUNT(nr_fully_covered_64); + in = TRUE; if(setup->fs.current.opaque) { lp_scene_bin_reset( scene, x, y ); lp_scene_bin_command( scene, x, y, @@ -480,8 +487,9 @@ do_triangle_ccw(struct setup_context *setup, } else { + /* rasterizer/shade partial tile */ + LP_COUNT(nr_partially_covered_64); in = TRUE; - /* shade partial tile */ lp_scene_bin_command( scene, x, y, lp_rast_triangle, lp_rast_arg_triangle(tri) ); -- cgit v1.2.3 From a904a7b99043c19493db5c0945b046795a5932b1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 16:21:31 -0700 Subject: llvmpipe: manually unroll the inputs.step[] setup code Good for a few more fps in some tests. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 49 ++++++++++++++++++----------- 1 file changed, 31 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 76ecab76447..dcd849bc856 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -362,31 +362,44 @@ do_triangle_ccw(struct setup_context *setup, tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; + /* Fill in the inputs.step[][] arrays. + * We've manually unrolled some loops here. + */ { const int xstep1 = -tri->dy12; const int xstep2 = -tri->dy23; const int xstep3 = -tri->dy31; - const int ystep1 = tri->dx12; const int ystep2 = tri->dx23; const int ystep3 = tri->dx31; - - int qx, qy, ix, iy; - int i = 0; - - for (qy = 0; qy < 2; qy++) { - for (qx = 0; qx < 2; qx++) { - for (iy = 0; iy < 2; iy++) { - for (ix = 0; ix < 2; ix++, i++) { - int x = qx * 2 + ix; - int y = qy * 2 + iy; - tri->inputs.step[0][i] = x * xstep1 + y * ystep1; - tri->inputs.step[1][i] = x * xstep2 + y * ystep2; - tri->inputs.step[2][i] = x * xstep3 + y * ystep3; - } - } - } - } + +#define SETUP_STEP(i, x, y) \ + do { \ + tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \ + tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \ + tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \ + } while (0) + + SETUP_STEP(0, 0, 0); + SETUP_STEP(1, 1, 0); + SETUP_STEP(2, 0, 1); + SETUP_STEP(3, 1, 1); + + SETUP_STEP(4, 2, 0); + SETUP_STEP(5, 3, 0); + SETUP_STEP(6, 2, 1); + SETUP_STEP(7, 3, 1); + + SETUP_STEP(8, 0, 2); + SETUP_STEP(9, 1, 2); + SETUP_STEP(10, 0, 3); + SETUP_STEP(11, 1, 3); + + SETUP_STEP(12, 2, 2); + SETUP_STEP(13, 3, 2); + SETUP_STEP(14, 2, 3); + SETUP_STEP(15, 3, 3); +#undef STEP } /* -- cgit v1.2.3 From ff9b55da9a6e3b5aa2d42eac7d79c675a679af57 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 17:31:43 -0700 Subject: llvmpipe: area is an int here, not float --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index dcd849bc856..b637c35735b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -265,7 +265,8 @@ do_triangle_ccw(struct setup_context *setup, struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); - float area, oneoverarea; + int area; + float oneoverarea; int minx, maxx, miny, maxy; tri->dx12 = x1 - x2; @@ -276,8 +277,7 @@ do_triangle_ccw(struct setup_context *setup, tri->dy23 = y2 - y3; tri->dy31 = y3 - y1; - area = (tri->dx12 * tri->dy31 - - tri->dx31 * tri->dy12); + area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); LP_COUNT(nr_tris); @@ -285,7 +285,7 @@ do_triangle_ccw(struct setup_context *setup, * * XXX: subject to overflow?? */ - if (area <= 0.0f) { + if (area <= 0) { lp_scene_putback_data( scene, sizeof *tri ); LP_COUNT(nr_culled_tris); return; -- cgit v1.2.3 From 798a9d3f942df1953a538073c85d6a6fed3775db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 18:25:31 -0700 Subject: llvmpipe: re-use a1 var in linear_coef() --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b637c35735b..15534756c4a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -89,7 +89,7 @@ static void linear_coef( struct lp_rast_triangle *tri, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - tri->inputs.a0[slot][i] = (v1[vert_attr][i] - + tri->inputs.a0[slot][i] = (a1 - (dadx * (v1[0][0] - 0.5f) + dady * (v1[0][1] - 0.5f))); } -- cgit v1.2.3 From e24ea786faad502da63cc4d59b0c30e3f1915c45 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 19:04:53 -0700 Subject: llvmpipe: consolidate lp_scene_alloc_aligned() calls Use just one call instead of four. Good for a few more fps. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 54 ++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 15534756c4a..e5e64c3e5c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -181,18 +181,8 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); unsigned slot; - /* Allocate space for the a0, dadx and dady arrays - */ - { - unsigned bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); - tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); - tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); - } - /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); @@ -243,6 +233,41 @@ static inline int subpixel_snap( float a ) } + +/** + * Alloc space for a new triangle plus the input.a0/dadx/dady arrays + * immediately after it. + * The memory is allocated from the per-scene pool, not per-tile. + * \param tri_size returns number of bytes allocated + * \param nr_inputs number of fragment shader inputs + * \return pointer to triangle space + */ +static INLINE struct lp_rast_triangle * +alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) +{ + unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + struct lp_rast_triangle *tri; + unsigned bytes; + char *inputs; + + assert(sizeof(*tri) % 16 == 0); + + bytes = sizeof(*tri) + (3 * input_array_sz); + + tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + + inputs = (char *) (tri + 1); + tri->inputs.a0 = (float (*)[4]) inputs; + tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); + tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + + *tri_size = bytes; + + return tri; +} + + + /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's @@ -264,10 +289,13 @@ do_triangle_ccw(struct setup_context *setup, const int y3 = subpixel_snap(v3[0][1]); struct lp_scene *scene = lp_setup_get_current_scene(setup); - struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); + struct lp_rast_triangle *tri; int area; float oneoverarea; int minx, maxx, miny, maxy; + unsigned tri_bytes; + + tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; @@ -286,7 +314,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_scene_putback_data( scene, sizeof *tri ); + lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; } @@ -306,7 +334,7 @@ do_triangle_ccw(struct setup_context *setup, if (miny == maxy || minx == maxx) { - lp_scene_putback_data( scene, sizeof *tri ); + lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; } -- cgit v1.2.3 From 1d23954a0848f8dd87b214f3a7ec3ae3c04ab0c1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 19:05:54 -0700 Subject: llvmpipe: s/inline/INLINE/ --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e5e64c3e5c8..9e59a6602cc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -227,7 +227,7 @@ static void setup_tri_coefficients( struct setup_context *setup, -static inline int subpixel_snap( float a ) +static INLINE int subpixel_snap( float a ) { return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); } -- cgit v1.2.3 From 3bca8691b51a1ca91572c62139f28b64c558ada2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 19:09:59 -0700 Subject: llvmpipe: use some local vars to index step arrays Saves a few more cycles. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index e9d15727a7b..3f76f159df1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -153,16 +153,18 @@ do_block_16( struct lp_rasterizer_task *rast_task, const int eo1 = tri->eo1 * 4; const int eo2 = tri->eo2 * 4; const int eo3 = tri->eo3 * 4; - + const int *step0 = tri->inputs.step[0]; + const int *step1 = tri->inputs.step[1]; + const int *step2 = tri->inputs.step[2]; int i; assert(x % 16 == 0); assert(y % 16 == 0); for (i = 0; i < 16; i++) { - int cx1 = c1 + (tri->inputs.step[0][i] * 4); - int cx2 = c2 + (tri->inputs.step[1][i] * 4); - int cx3 = c3 + (tri->inputs.step[2][i] * 4); + int cx1 = c1 + step0[i] * 4; + int cx2 = c2 + step1[i] * 4; + int cx3 = c3 + step2[i] * 4; if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || -- cgit v1.2.3 From 99f1e32fadbf16c167350af3304b2d68c464452a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 13:46:23 -0700 Subject: gallium/util: print dlerror() info upon dlopen() failure --- src/gallium/auxiliary/util/u_dl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index b42b429d4d7..d8803f77fa0 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -26,8 +26,8 @@ * **************************************************************************/ - #include "pipe/p_config.h" +#include "util/u_debug.h" #if defined(PIPE_OS_UNIX) #include @@ -43,7 +43,12 @@ struct util_dl_library * util_dl_open(const char *filename) { #if defined(PIPE_OS_UNIX) - return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL); + struct util_dl_library *lib; + lib = (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL); + if (!lib) { + debug_printf("gallium: dlopen() failed: %s\n", dlerror()); + } + return lib; #elif defined(PIPE_OS_WINDOWS) return (struct util_dl_library *)LoadLibraryA(filename); #else -- cgit v1.2.3 From 5460da543608805a3debbb401ccc19442e1cb476 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 13:46:43 -0700 Subject: gallium/util: comments for time-related functions --- src/gallium/auxiliary/util/u_time.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index a6189a247bb..29fd1cbc67d 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -74,14 +74,23 @@ struct util_time void util_time_get(struct util_time *t); +/** + * Return t2 = t1 + usecs + */ void util_time_add(const struct util_time *t1, int64_t usecs, struct util_time *t2); +/** + * Return current time in microseconds + */ uint64_t util_time_micros( void ); +/** + * Return difference between times, in microseconds + */ int64_t util_time_diff(const struct util_time *t1, const struct util_time *t2); -- cgit v1.2.3 From e95ad2a2b521514eaec04f9b266ee030ecc639a3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 13:49:43 -0700 Subject: llvmpipe: count/report time spent in LLVM compilations --- src/gallium/drivers/llvmpipe/lp_perf.c | 4 ++++ src/gallium/drivers/llvmpipe/lp_perf.h | 4 ++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 20 ++++++++++++++++---- 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c index 2628d51069b..042218b27fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.c +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -82,5 +82,9 @@ lp_print_counters(void) debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); + + debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); + debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); } } diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index 9886088c38e..d982bcc989b 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -49,6 +49,8 @@ struct lp_counters unsigned nr_partially_covered_16; unsigned nr_empty_4; unsigned nr_non_empty_4; + unsigned nr_llvm_compiles; + int64_t llvm_compile_time; /**< total, in microseconds */ }; @@ -58,8 +60,10 @@ extern struct lp_counters lp_count; /** Increment the named counter (only for debug builds) */ #ifdef DEBUG #define LP_COUNT(counter) lp_count.counter++ +#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr) #else #define LP_COUNT(counter) +#define LP_COUNT_ADD(counter, incr) (void) incr #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 0053c1b88b7..a7514ee011d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -65,6 +65,7 @@ #include "util/u_memory.h" #include "util/u_format.h" #include "util/u_debug_dump.h" +#include "util/u_time.h" #include "pipe/internal/p_winsys_screen.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" @@ -84,13 +85,14 @@ #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_debug.h" -#include "lp_screen.h" -#include "lp_context.h" #include "lp_buffer.h" +#include "lp_context.h" +#include "lp_debug.h" +#include "lp_perf.h" +#include "lp_screen.h" #include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" -#include "lp_debug.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -1108,9 +1110,19 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) variant = variant->next; } - if(!variant) + if (!variant) { + struct util_time t0, t1; + int64_t dt; + util_time_get(&t0); + variant = generate_variant(lp, shader, &key); + util_time_get(&t1); + dt = util_time_diff(&t0, &t1); + LP_COUNT_ADD(llvm_compile_time, dt); + LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + } + shader->current = variant; /* TODO: put this in the variant */ -- cgit v1.2.3 From 36a0819ff4ede1af91dcf909106cf20659856384 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jan 2010 17:16:42 -0700 Subject: llvmpipe: added debug option to disable LLVM optimization passes --- src/gallium/drivers/llvmpipe/lp_debug.h | 1 + src/gallium/drivers/llvmpipe/lp_jit.c | 30 +++++++++++++++++------------- src/gallium/drivers/llvmpipe/lp_screen.c | 1 + 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 7e04bd471ef..ee818143610 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -48,6 +48,7 @@ st_print_current(void); #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 #define DEBUG_COUNTERS 0x800 +#define DEBUG_NO_LLVM_OPT 0x1000 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 429cb973c26..9fad7033db5 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -37,6 +37,7 @@ #include "util/u_memory.h" #include "util/u_cpu_detect.h" +#include "lp_debug.h" #include "lp_screen.h" #include "lp_bld_intr.h" #include "lp_bld_misc.h" @@ -165,20 +166,23 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->pass = LLVMCreateFunctionPassManager(screen->provider); LLVMAddTargetData(screen->target, screen->pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ - LLVMAddConstantPropagationPass(screen->pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(screen->pass); + + if ((LP_DEBUG & DEBUG_NO_LLVM_OPT) == 0) { + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddConstantPropagationPass(screen->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(screen->pass); + } + LLVMAddPromoteMemoryToRegisterPass(screen->pass); + LLVMAddGVNPass(screen->pass); + LLVMAddCFGSimplificationPass(screen->pass); } - LLVMAddPromoteMemoryToRegisterPass(screen->pass); - LLVMAddGVNPass(screen->pass); - LLVMAddCFGSimplificationPass(screen->pass); lp_jit_init_globals(screen); } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9dd4ea7ef67..a3adc81e9f9 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -55,6 +55,7 @@ static const struct debug_named_value lp_debug_flags[] = { { "show_tiles", DEBUG_SHOW_TILES }, { "show_subtiles", DEBUG_SHOW_SUBTILES }, { "counters", DEBUG_COUNTERS }, + { "nopt", DEBUG_NO_LLVM_OPT }, {NULL, 0} }; #endif -- cgit v1.2.3