diff options
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_priv.h | 37 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_scene.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_scene.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_texture.c | 31 |
7 files changed, 83 insertions, 53 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5c837a043e0..be5a286e3da 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -95,6 +95,10 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->bin = bin; task->x = x * TILE_SIZE; task->y = y * TILE_SIZE; + task->width = TILE_SIZE + x * TILE_SIZE > task->scene->width_aligned ? + task->scene->width_aligned - x * TILE_SIZE : TILE_SIZE; + task->height = TILE_SIZE + y * TILE_SIZE > task->scene->height_aligned ? + task->scene->height_aligned - y * TILE_SIZE : TILE_SIZE; /* reset pointers to color and depth tile(s) */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); @@ -144,8 +148,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, scene->cbufs[i].stride, task->x, task->y, - TILE_SIZE, - TILE_SIZE, + task->width, + task->height, &uc); } } @@ -172,8 +176,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, scene->cbufs[i].stride, task->x, task->y, - TILE_SIZE, - TILE_SIZE, + task->width, + task->height, &uc); } } @@ -198,8 +202,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint64_t clear_mask64 = arg.clear_zstencil.mask; uint32_t clear_value = (uint32_t) clear_value64; uint32_t clear_mask = (uint32_t) clear_mask64; - const unsigned height = TILE_SIZE; - const unsigned width = TILE_SIZE; + const unsigned height = task->height; + const unsigned width = task->width; const unsigned block_size = scene->zsbuf.blocksize; const unsigned dst_stride = scene->zsbuf.stride; uint8_t *dst; @@ -325,8 +329,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, variant = state->variant; /* render the whole 64x64 tile in 4x4 chunks */ - for (y = 0; y < TILE_SIZE; y += 4){ - for (x = 0; x < TILE_SIZE; x += 4) { + for (y = 0; y < task->height; y += 4){ + for (x = 0; x < task->width; x += 4) { uint8_t *color[PIPE_MAX_COLOR_BUFS]; unsigned stride[PIPE_MAX_COLOR_BUFS]; uint8_t *depth = NULL; @@ -434,21 +438,27 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); - /* run shader on 4x4 block */ - BEGIN_JIT_CALL(state, task); - variant->jit_function[RAST_EDGE_TEST](&state->jit_context, - x, y, - inputs->frontfacing, - GET_A0(inputs), - GET_DADX(inputs), - GET_DADY(inputs), - color, - depth, - mask, - &task->thread_data, - stride, - depth_stride); - END_JIT_CALL(); + /* + * The rasterizer may produce fragments outside our + * allocated 4x4 blocks hence need to filter them out here. + */ + if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { + /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state, task); + variant->jit_function[RAST_EDGE_TEST](&state->jit_context, + x, y, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), + color, + depth, + mask, + &task->thread_data, + stride, + depth_stride); + END_JIT_CALL(); + } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 8dd3615e78a..9fe89e5b6f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -50,6 +50,9 @@ struct cmd_bin; #define FIXED_ORDER 4 #define FIXED_ONE (1<<FIXED_ORDER) +/* Rasterizer output size going to jit fs, width/height */ +#define LP_RASTER_BLOCK_SIZE 4 + struct lp_rasterizer_task; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index e4b6e5b301f..4876d7472fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -86,6 +86,7 @@ struct lp_rasterizer_task struct lp_scene *scene; unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + unsigned width, height; /**< width, height of current tile, in pixels */ uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS]; uint8_t *depth_tile; @@ -293,21 +294,27 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth_stride = scene->zsbuf.stride; } - /* run shader on 4x4 block */ - BEGIN_JIT_CALL(state, task); - variant->jit_function[RAST_WHOLE]( &state->jit_context, - x, y, - inputs->frontfacing, - GET_A0(inputs), - GET_DADX(inputs), - GET_DADY(inputs), - color, - depth, - 0xffff, - &task->thread_data, - stride, - depth_stride); - END_JIT_CALL(); + /* + * The rasterizer may produce fragments outside our + * allocated 4x4 blocks hence need to filter them out here. + */ + if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { + /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state, task); + variant->jit_function[RAST_WHOLE]( &state->jit_context, + x, y, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), + color, + depth, + 0xffff, + &task->thread_data, + stride, + depth_stride); + END_JIT_CALL(); + } } void lp_rast_triangle_1( struct lp_rasterizer_task *, diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 771ad085a12..2dfc7ff9ce7 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -505,6 +505,8 @@ void lp_scene_begin_binning( struct lp_scene *scene, scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE; + scene->width_aligned = align(fb->width, LP_RASTER_BLOCK_SIZE); + scene->height_aligned = align(fb->height, LP_RASTER_BLOCK_SIZE); assert(scene->tiles_x <= TILES_X); assert(scene->tiles_y <= TILES_Y); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index fa5bbcaf013..bc6c448bc7f 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -144,6 +144,10 @@ struct lp_scene { /** list of resources referenced by the scene commands */ struct resource_ref *resources; + /** aligned scene width, height */ + unsigned width_aligned; + unsigned height_aligned; + /** Total memory used by the scene (in bytes). This sums all the * data blocks and counts all bins, state, resource references and * other random allocations within the scene. diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index a141fa337ab..bafcf56b803 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -694,8 +694,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, assert(last_level <= res->last_level); /* - * The complexity here is only necessary for depth textures which - * still are tiled. + * The complexity here should no longer be necessary. */ mip_ptr = llvmpipe_get_texture_image_all(lp_tex, first_level, LP_TEX_USAGE_READ); diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 0ac3528f7ac..56eb4999a1c 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -49,6 +49,7 @@ #include "lp_texture.h" #include "lp_setup.h" #include "lp_state.h" +#include "lp_rast.h" #include "state_tracker/sw_winsys.h" @@ -84,15 +85,15 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, { unsigned alignment, nblocksx, nblocksy, block_size; - /* For non-compressed formats we need to align the texture size - * to the tile size to facilitate render-to-texture. - * XXX this blows up 1d/1d array textures by unreasonable - * amount (factor 64), probably should do something about it. + /* For non-compressed formats we need 4x4 pixel alignment + * (for now). We also want cache line size in x direction, + * otherwise same cache line could end up in multiple threads. + * XXX this blows up 1d/1d array textures by a factor of 4. */ if (util_format_is_compressed(pt->format)) alignment = 1; else - alignment = TILE_SIZE; + alignment = LP_RASTER_BLOCK_SIZE; nblocksx = util_format_get_nblocksx(pt->format, align(width, alignment)); @@ -100,7 +101,10 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, align(height, alignment)); block_size = util_format_get_blocksize(pt->format); - lpr->row_stride[level] = align(nblocksx * block_size, 16); + if (util_format_is_compressed(pt->format)) + lpr->row_stride[level] = nblocksx * block_size; + else + lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline); /* if row_stride * height > LP_MAX_TEXTURE_SIZE */ if (lpr->row_stride[level] > LP_MAX_TEXTURE_SIZE / nblocksy) { @@ -244,7 +248,12 @@ llvmpipe_resource_create(struct pipe_screen *_screen, assert(templat->height0 == 1); assert(templat->depth0 == 1); assert(templat->last_level == 0); - lpr->data = align_malloc(bytes, 16); + /* + * Reserve some extra storage since if we'd render to a buffer we + * read/write always LP_RASTER_BLOCK_SIZE pixels, but the element + * offset doesn't need to be aligned to LP_RASTER_BLOCK_SIZE. + */ + lpr->data = align_malloc(bytes + (LP_RASTER_BLOCK_SIZE - 1) * 4 * sizeof(float), 16); /* * buffers don't really have stride but it's probably safer * (for code doing same calculations for buffers and textures) @@ -327,7 +336,6 @@ llvmpipe_resource_map(struct pipe_resource *resource, struct llvmpipe_screen *screen = llvmpipe_screen(resource->screen); struct sw_winsys *winsys = screen->winsys; unsigned dt_usage; - uint8_t *map2; if (tex_usage == LP_TEX_USAGE_READ) { dt_usage = PIPE_TRANSFER_READ; @@ -345,14 +353,11 @@ llvmpipe_resource_map(struct pipe_resource *resource, /* install this linear image in texture data structure */ lpr->linear_img.data = map; - /* make sure tiled data gets converted to linear data */ - map2 = llvmpipe_get_texture_image(lpr, 0, 0, tex_usage); - return map2; + return map; } else if (llvmpipe_resource_is_texture(resource)) { - map = llvmpipe_get_texture_image(lpr, layer, level, - tex_usage); + map = llvmpipe_get_texture_image(lpr, layer, level, tex_usage); return map; } else { |