diff options
author | Keith Whitwell <[email protected]> | 2010-07-16 14:40:30 +0100 |
---|---|---|
committer | José Fonseca <[email protected]> | 2010-07-16 17:24:21 +0100 |
commit | 2f6d47a7c8d6e69e5154de44115aab9ba35a41d2 (patch) | |
tree | 8919f6686b6f7db8aa3d3305084076d8884afb15 /src/gallium/drivers/llvmpipe/lp_rast.c | |
parent | b7fff13d58b57870807bae2f43fa2854b551b267 (diff) |
llvmpipe: use single swizzled tile
Use a single swizzled tile per colorbuf (and per thread) to avoid
accumulating large amounts of cached swizzled data.
Now that the SSE3 code has been merged to master, the performance delta
of this change is minimal, the main benefit is reduced memory usage
due to no longer keeping swizzled copies of render targets.
It's clear from the performance of the in-place version of this code
that there is still quite a bit of time being spent swizzling &
unswizzling, but it's not clear exactly how to reduce that.
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_rast.c')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 55 |
1 files changed, 21 insertions, 34 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a023d2b668e..654f4ea48eb 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -67,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast, cbuf->level, cbuf->zslice, LP_TEX_USAGE_READ_WRITE, - LP_TEX_LAYOUT_NONE); + LP_TEX_LAYOUT_LINEAR); } if (fb->zsbuf) { @@ -271,11 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, dst = task->depth_tile; - if (lp_is_dummy_tile(dst)) - return; - - assert(dst == lp_rast_get_depth_block_pointer(task, task->x, task->y)); - switch (block_size) { case 1: memset(dst, (uint8_t) clear_value, height * width); @@ -375,10 +370,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task, struct pipe_surface *cbuf = scene->fb.cbufs[buf]; const unsigned face = cbuf->face, level = cbuf->level; struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); - /* this will convert the tiled data to linear if needed */ - (void) llvmpipe_get_texture_tile_linear(lpt, face, level, - LP_TEX_USAGE_READ, - task->x, task->y); + + if (!task->color_tiles[buf]) + continue; + + llvmpipe_unswizzle_cbuf_tile(lpt, + face, + level, + task->x, task->y, + task->color_tiles[buf]); } } @@ -589,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task) (void) outline_subtiles; #endif + { + union lp_rast_cmd_arg dummy = {0}; + lp_rast_store_linear_color(task, dummy); + } + /* debug */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); task->depth_tile = NULL; @@ -751,30 +756,8 @@ debug_bin( const struct cmd_bin *bin ) static boolean is_empty_bin( const struct cmd_bin *bin ) { - const struct cmd_block *head = bin->commands.head; - int i; - - if (0) - debug_bin(bin); - - /* We emit at most two load-tile commands at the start of the first - * command block. In addition we seem to emit a couple of - * set-state commands even in empty bins. - * - * As a heuristic, if a bin has more than 4 commands, consider it - * non-empty. - */ - if (head->next != NULL || - head->count > 4) { - return FALSE; - } - - for (i = 0; i < head->count; i++) - if (head->cmd[i] != lp_rast_store_linear_color) { - return FALSE; - } - - return TRUE; + if (0) debug_bin(bin); + return bin->commands.head->count == 0; } @@ -984,6 +967,10 @@ lp_rast_create( unsigned num_threads ) /* for synchronizing rasterization threads */ pipe_barrier_init( &rast->barrier, rast->num_threads ); + memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf); + + memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); + return rast; } |