summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/llvmpipe/lp_rast_priv.h
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2013-05-30 02:05:01 +0200
committerRoland Scheidegger <[email protected]>2013-05-31 20:21:05 +0200
commit869c5d438f137b2c6b9aec1dddc00bfa64f36621 (patch)
tree893e668a17c701d88ef2af0afa6cbfceb1beacb1 /src/gallium/drivers/llvmpipe/lp_rast_priv.h
parente881c9a5dc5457f0b096a3c583c5b1450beb89e9 (diff)
llvmpipe: reduce alignment requirement for resources from 64x64 to 4x4
The overallocation was very bad especially for things like 1d array textures which got blown up by a factor of 64. (Even ordinary smallish 2d textures benefit a lot from this, a mipmapped 64x64 rgba8 texture previously used 7*16kB = 112kB instead of now ~22kB.) 4x4 is chosen because this is the size the jit functions run on, so making it smaller is going to be a bit more complicated. It is actually not strictly 4x4 pixel, since we'd want to avoid situations where different threads are rendering to the same cacheline so we keep cacheline size alignment in x direction (often 64bytes). To make this work introduce new task width/height parameters and make sure clears don't clear the whole tile if it's a partial tile. Likewise, the rasterizer may produce fragments outside the 4x4 blocks present in a tile, so don't call the jit function for them. This does not yet fix rendering to buffers (which cannot have any y alignment at all), and 1d/1d array textures are still overallocated by a factor of 4. v2: replace magic number 4 with LP_RASTER_BLOCK_SIZE, fix size of buffers allocated (needed in case we render to them). Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_rast_priv.h')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h37
1 files changed, 22 insertions, 15 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index e4b6e5b301f..4876d7472fb 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -86,6 +86,7 @@ struct lp_rasterizer_task
struct lp_scene *scene;
unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
+ unsigned width, height; /**< width, height of current tile, in pixels */
uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
uint8_t *depth_tile;
@@ -293,21 +294,27 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
depth_stride = scene->zsbuf.stride;
}
- /* run shader on 4x4 block */
- BEGIN_JIT_CALL(state, task);
- variant->jit_function[RAST_WHOLE]( &state->jit_context,
- x, y,
- inputs->frontfacing,
- GET_A0(inputs),
- GET_DADX(inputs),
- GET_DADY(inputs),
- color,
- depth,
- 0xffff,
- &task->thread_data,
- stride,
- depth_stride);
- END_JIT_CALL();
+ /*
+ * The rasterizer may produce fragments outside our
+ * allocated 4x4 blocks hence need to filter them out here.
+ */
+ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+ /* run shader on 4x4 block */
+ BEGIN_JIT_CALL(state, task);
+ variant->jit_function[RAST_WHOLE]( &state->jit_context,
+ x, y,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
+ color,
+ depth,
+ 0xffff,
+ &task->thread_data,
+ stride,
+ depth_stride);
+ END_JIT_CALL();
+ }
}
void lp_rast_triangle_1( struct lp_rasterizer_task *,