diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
20 files changed, 276 insertions, 406 deletions
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index fd6ba1561ea..5583fca38e6 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -1,3 +1,4 @@ +from sys import executable as python_cmd import distutils.version Import('*') @@ -16,7 +17,7 @@ env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) # XXX: Our dependency scanner only finds depended modules in relative dirs. diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index e05bbe5011a..99a768afd80 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -258,16 +258,16 @@ lp_build_stencil_op_single(struct lp_build_context *bld, } if (stencil->writemask != stencilMax) { - /* compute res = (res & mask) | (stencilVals & ~mask) */ - LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask); - LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask"); - LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1"); - LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2"); - res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2"); + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); + } + else { + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } - - /* only the update the vector elements enabled by 'mask' */ - res = lp_build_select(bld, mask, res, stencilVals); return res; } @@ -662,9 +662,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } /* Mix the old and new Z buffer values. - * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] + * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) */ - z_dst = lp_build_select(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 78744da500b..2cf6f38c4b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -141,7 +141,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, else { dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadxy = LLVMBuildAdd(builder, dadx, dady, ""); + dadxy = LLVMBuildFAdd(builder, dadx, dady, ""); attrib_name(dadx, attrib, chan, ".dadx"); attrib_name(dady, attrib, chan, ".dady"); attrib_name(dadxy, attrib, chan, ".dadxy"); @@ -177,7 +177,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * dadq2 = 2 * dq */ - dadq2 = LLVMBuildAdd(builder, dadq, dadq, ""); + dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* * a = a0 + x * dadx + y * dady @@ -193,12 +193,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - a = LLVMBuildAdd(builder, a, - LLVMBuildMul(builder, bld->x, dadx, ""), - ""); - a = LLVMBuildAdd(builder, a, - LLVMBuildMul(builder, bld->y, dady, ""), - ""); + LLVMValueRef tmp; + tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); + a = LLVMBuildFAdd(builder, a, tmp, ""); + tmp = LLVMBuildFMul(builder, bld->y, dady, ""); + a = LLVMBuildFAdd(builder, a, tmp, ""); } } @@ -212,7 +211,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * Compute the attrib values on the upper-left corner of each quad. */ - a = LLVMBuildAdd(builder, a, dadq2, ""); + a = LLVMBuildFAdd(builder, a, dadq2, ""); /* * a *= 1 / w diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 3db4f12ebb6..7543bd7b2b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -46,6 +46,10 @@ #include "lp_query.h" #include "lp_setup.h" + +DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE) + + static void llvmpipe_destroy( struct pipe_context *pipe ) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); @@ -130,7 +134,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* FIXME: devise alternative to draw_texture_samplers */ - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) + if (debug_get_option_lp_no_rast()) llvmpipe->no_rast = TRUE; llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index b2643ab33cd..50f9091c3ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -77,6 +77,7 @@ struct llvmpipe_context { struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; struct { struct llvmpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; int offset[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 625d0c8a8c9..e73b431cb4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -49,20 +49,11 @@ * the drawing to the 'draw' module. */ static void -llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct llvmpipe_context *lp = llvmpipe_context(pipe); struct draw_context *draw = lp->draw; + void *mapped_indices = NULL; unsigned i; if (lp->dirty) @@ -77,27 +68,25 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = llvmpipe_resource_data(indexBuffer); - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, 0, start, - start + count - 1, NULL); + if (info->indexed && lp->index_buffer.buffer) { + char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer); + mapped_indices = (void *) (indices + lp->index_buffer.offset); } + + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + lp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + llvmpipe_prepare_vertex_sampling(lp, lp->num_vertex_sampler_views, lp->vertex_sampler_views); /* draw! */ - draw_arrays_instanced(draw, mode, start, count, - startInstance, instanceCount); + draw_arrays_instanced(draw, info->mode, info->start, info->count, + info->start_instance, info->instance_count); /* * unmap vertex/index buffers @@ -105,7 +94,7 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } llvmpipe_cleanup_vertex_sampling(lp); @@ -119,112 +108,8 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } -static void -llvmpipe_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - llvmpipe_draw_range_elements_instanced(pipe, - NULL, /* no indexBuffer */ - 0, 0, /* indexSize, indexBias */ - 0, ~0, /* minIndex, maxIndex */ - mode, - start, - count, - startInstance, - instanceCount); -} - - -static void -llvmpipe_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - 0, ~0, /* minIndex, maxIndex */ - mode, - start, - count, - startInstance, - instanceCount); -} - - -static void -llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, /* min, maxIndex */ - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - -static void -llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - min_index, max_index, - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - -static void -llvmpipe_draw_arrays(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - NULL, /* indexBuffer */ - 0, /* indexSize */ - 0, /* indexBias */ - 0, ~0, /* min, maxIndex */ - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - void llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) { - llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; - llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; - llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced; - llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced; + llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 654f4ea48eb..3215d0f6525 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -43,6 +43,12 @@ #include "lp_scene.h" +#ifdef DEBUG +int jit_line = 0; +const struct lp_rast_state *jit_state = NULL; +#endif + + /** * Begin rasterizing a scene. * Called once per scene by one thread. @@ -368,14 +374,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task, for (buf = 0; buf < rast->state.nr_cbufs; buf++) { struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - const unsigned face = cbuf->face, level = cbuf->level; + const unsigned face_slice = cbuf->face + cbuf->zslice; + const unsigned level = cbuf->level; struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); if (!task->color_tiles[buf]) continue; llvmpipe_unswizzle_cbuf_tile(lpt, - face, + face_slice, level, task->x, task->y, task->color_tiles[buf]); @@ -418,6 +425,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, inputs->facing, @@ -428,6 +436,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth, 0xffff, &task->vis_counter); + END_JIT_CALL(); } } } @@ -497,6 +506,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, assert(lp_check_alignment(state->jit_context.blend_color, 16)); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, inputs->facing, @@ -507,6 +517,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, depth, mask, &task->vis_counter); + END_JIT_CALL(); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index eaf2a6f3345..44319a0ad6f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -104,9 +104,6 @@ struct lp_rast_plane { int dcdx; int dcdy; - - /* edge/step info for 3 edges and 4x4 block of pixels */ - const int *step; }; /** @@ -119,8 +116,6 @@ struct lp_rast_triangle { /* inputs for the shader */ struct lp_rast_shader_inputs inputs; - int step[3][16]; - #ifdef DEBUG float v[3][2]; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index b4a48cfd024..fae7f6d3dc2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -40,6 +40,34 @@ #include "lp_limits.h" +/* If we crash in a jitted function, we can examine jit_line and jit_state + * to get some info. This is not thread-safe, however. + */ +#ifdef DEBUG + +extern int jit_line; +extern const struct lp_rast_state *jit_state; + +#define BEGIN_JIT_CALL(state) \ + do { \ + jit_line = __LINE__; \ + jit_state = state; \ + } while (0) + +#define END_JIT_CALL() \ + do { \ + jit_line = 0; \ + jit_state = NULL; \ + } while (0) + +#else + +#define BEGIN_JIT_CALL(X) +#define END_JIT_CALL() + +#endif + + struct lp_rasterizer; @@ -249,6 +277,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, x, y); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, inputs->facing, @@ -259,6 +288,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth, 0xffff, &task->vis_counter ); + END_JIT_CALL(); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index ebe9a8e92b4..980c18c0240 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,52 +37,6 @@ #include "lp_tile_soa.h" -/** - * Map an index in [0,15] to an x,y position, multiplied by 4. - * This is used to get the position of each subtile in a 4x4 - * grid of edge step values. - * Note: we can use some bit twiddling to compute these values instead - * of using a look-up table, but there's no measurable performance - * difference. - */ -static const int pos_table4[16][2] = { - { 0, 0 }, - { 4, 0 }, - { 0, 4 }, - { 4, 4 }, - { 8, 0 }, - { 12, 0 }, - { 8, 4 }, - { 12, 4 }, - { 0, 8 }, - { 4, 8 }, - { 0, 12 }, - { 4, 12 }, - { 8, 8 }, - { 12, 8 }, - { 8, 12 }, - { 12, 12 } -}; - - -static const int pos_table16[16][2] = { - { 0, 0 }, - { 16, 0 }, - { 0, 16 }, - { 16, 16 }, - { 32, 0 }, - { 48, 0 }, - { 32, 16 }, - { 48, 16 }, - { 0, 32 }, - { 16, 32 }, - { 0, 48 }, - { 16, 48 }, - { 32, 32 }, - { 48, 32 }, - { 32, 48 }, - { 48, 48 } -}; /** @@ -113,6 +67,68 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } + +static INLINE unsigned +build_mask(int c, int dcdx, int dcdy) +{ + int mask = 0; + + int c0 = c; + int c1 = c0 + dcdx; + int c2 = c1 + dcdx; + int c3 = c2 + dcdx; + + mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2); + mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8); + mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10); + mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1); + mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3); + mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9); + mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11); + mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4); + mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6); + mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12); + mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14); + mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5); + mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7); + mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13); + mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15); + + return mask; +} + +static INLINE unsigned +build_mask_linear(int c, int dcdx, int dcdy) +{ + int mask = 0; + + int c0 = c; + int c1 = c0 + dcdy; + int c2 = c1 + dcdy; + int c3 = c2 + dcdy; + + mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1); + mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2); + mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3); + mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4); + mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5); + mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6); + mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7); + mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8); + mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9); + mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10); + mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11); + mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12); + mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13); + mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14); + mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15); + + return mask; +} + + #define TAG(x) x##_1 #define NR_PLANES 1 #include "lp_rast_tri_tmp.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index a410c611a3f..43f72d8ca8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -46,19 +46,13 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int x, int y, const int *c) { - unsigned mask = 0; - int i; + unsigned mask = 0xffff; + int j; - for (i = 0; i < 16; i++) { - int any_negative = 0; - int j; - - for (j = 0; j < NR_PLANES; j++) - any_negative |= (c[j] - 1 + plane[j].step[i]); - - any_negative >>= 31; - - mask |= (~any_negative) & (1 << i); + for (j = 0; j < NR_PLANES; j++) { + mask &= ~build_mask(c[j] - 1, + -plane[j].dcdx, + plane[j].dcdy); } /* Now pass to the shader: @@ -79,24 +73,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int *c) { unsigned outmask, inmask, partmask, partial_mask; - unsigned i, j; + unsigned j; outmask = 0; /* outside one or more trivial reject planes */ partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < NR_PLANES; j++) { - const int *step = plane[j].step; - const int eo = plane[j].eo * 4; - const int ei = plane[j].ei * 4; - const int cox = c[j] + eo; - const int cio = ei - 1 - eo; - - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 4; - int part = out + cio; - outmask |= (out >> 31) & (1 << i); - partmask |= (part >> 31) & (1 << i); - } + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + const int cox = c[j] + plane[j].eo * 4; + const int cio = c[j] + plane[j].ei * 4 - 1; + + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); } if (outmask == 0xffff) @@ -117,15 +106,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, */ while (partial_mask) { int i = ffs(partial_mask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; int cx[NR_PLANES]; - for (j = 0; j < NR_PLANES; j++) - cx[j] = c[j] + plane[j].step[i] * 4; - partial_mask &= ~(1 << i); + for (j = 0; j < NR_PLANES; j++) + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); + TAG(do_block_4)(task, tri, plane, px, py, cx); } @@ -133,8 +126,10 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, */ while (inmask) { int i = ffs(inmask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; inmask &= ~(1 << i); @@ -157,35 +152,28 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; unsigned outmask, inmask, partmask, partial_mask; - unsigned i, j, nr_planes = 0; + unsigned j = 0; + + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ while (plane_mask) { int i = ffs(plane_mask) - 1; - plane[nr_planes] = tri->plane[i]; + plane[j] = tri->plane[i]; plane_mask &= ~(1 << i); - nr_planes++; - }; - - assert(nr_planes == NR_PLANES); - outmask = 0; /* outside one or more trivial reject planes */ - partmask = 0; /* outside one or more trivial accept planes */ + c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - for (j = 0; j < NR_PLANES; j++) { - const int *step = plane[j].step; - const int eo = plane[j].eo * 16; - const int ei = plane[j].ei * 16; - int cox, cio; + { + const int dcdx = -plane[j].dcdx * 16; + const int dcdy = plane[j].dcdy * 16; + const int cox = c[j] + plane[j].eo * 16; + const int cio = c[j] + plane[j].ei * 16 - 1; - c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - cox = c[j] + eo; - cio = ei - 1 - eo; - - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 16; - int part = out + cio; - outmask |= (out >> 31) & (1 << i); - partmask |= (part >> 31) & (1 << i); + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); } + + j++; } if (outmask == 0xffff) @@ -206,12 +194,16 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, */ while (partial_mask) { int i = ffs(partial_mask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; int cx[NR_PLANES]; for (j = 0; j < NR_PLANES; j++) - cx[j] = c[j] + plane[j].step[i] * 16; + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); partial_mask &= ~(1 << i); @@ -223,8 +215,10 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, */ while (inmask) { int i = ffs(inmask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; inmask &= ~(1 << i); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 7e432503c12..393533ebee4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -61,36 +61,6 @@ struct tri_info { -static const int step_scissor_minx[16] = { - 0, 1, 0, 1, - 2, 3, 2, 3, - 0, 1, 0, 1, - 2, 3, 2, 3 -}; - -static const int step_scissor_maxx[16] = { - 0, -1, 0, -1, - -2, -3, -2, -3, - 0, -1, 0, -1, - -2, -3, -2, -3 -}; - -static const int step_scissor_miny[16] = { - 0, 0, 1, 1, - 0, 0, 1, 1, - 2, 2, 3, 3, - 2, 2, 3, 3 -}; - -static const int step_scissor_maxy[16] = { - 0, 0, -1, -1, - 0, 0, -1, -1, - -2, -2, -3, -3, - -2, -2, -3, -3 -}; - - - static INLINE int subpixel_snap(float a) @@ -260,13 +230,13 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; + unsigned i; /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < setup->fs.nr_inputs; slot++) { unsigned vert_attr = setup->fs.input[slot].src_index; unsigned usage_mask = setup->fs.input[slot].usage_mask; - unsigned i; switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: @@ -316,6 +286,34 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); + + if (0) { + for (i = 0; i < NUM_CHANNELS; i++) { + float a0 = tri->inputs.a0 [0][i]; + float dadx = tri->inputs.dadx[0][i]; + float dady = tri->inputs.dady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], + a0, dadx, dady); + } + + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = tri->inputs.a0 [1 + slot][i]; + float dadx = tri->inputs.dadx[1 + slot][i]; + float dady = tri->inputs.dady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, + "xyzw"[i], + a0, dadx, dady); + } + } + } + } } @@ -525,7 +523,7 @@ do_triangle_ccw(struct lp_setup_context *setup, info.dx20 = info.v2[0][0] - info.v0[0][0]; info.dy01 = info.v0[0][1] - info.v1[0][1]; info.dy20 = info.v2[0][1] - info.v0[0][1]; - info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01); + info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01); info.frontfacing = frontfacing; /* Setup parameter interpolants: @@ -590,35 +588,6 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Calculate trivial accept offsets from the above. */ plane->ei = plane->dcdy - plane->dcdx - plane->eo; - - plane->step = tri->step[i]; - - /* Fill in the inputs.step[][] arrays. - * We've manually unrolled some loops here. - */ -#define SETUP_STEP(j, x, y) \ - tri->step[i][j] = y * plane->dcdy - x * plane->dcdx - - SETUP_STEP(0, 0, 0); - SETUP_STEP(1, 1, 0); - SETUP_STEP(2, 0, 1); - SETUP_STEP(3, 1, 1); - - SETUP_STEP(4, 2, 0); - SETUP_STEP(5, 3, 0); - SETUP_STEP(6, 2, 1); - SETUP_STEP(7, 3, 1); - - SETUP_STEP(8, 0, 2); - SETUP_STEP(9, 1, 2); - SETUP_STEP(10, 0, 3); - SETUP_STEP(11, 1, 3); - - SETUP_STEP(12, 2, 2); - SETUP_STEP(13, 3, 2); - SETUP_STEP(14, 2, 3); - SETUP_STEP(15, 3, 3); -#undef STEP } @@ -641,28 +610,24 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].step = step_scissor_minx; tri->plane[3].dcdx = -1; tri->plane[3].dcdy = 0; tri->plane[3].c = 1-minx; tri->plane[3].ei = 0; tri->plane[3].eo = 1; - tri->plane[4].step = step_scissor_maxx; tri->plane[4].dcdx = 1; tri->plane[4].dcdy = 0; tri->plane[4].c = maxx; tri->plane[4].ei = -1; tri->plane[4].eo = 0; - tri->plane[5].step = step_scissor_miny; tri->plane[5].dcdx = 0; tri->plane[5].dcdy = 1; tri->plane[5].c = 1-miny; tri->plane[5].ei = 0; tri->plane[5].eo = 1; - tri->plane[6].step = step_scissor_maxy; tri->plane[6].dcdx = 0; tri->plane[6].dcdy = -1; tri->plane[6].c = maxy; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5953d690a41..dbca49a2efa 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -75,6 +75,7 @@ #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_tgsi.h" @@ -676,6 +677,11 @@ generate_fragment(struct llvmpipe_context *lp, color_ptr); } +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -710,6 +716,7 @@ generate_fragment(struct llvmpipe_context *lp, if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); } + lp_func_delete_body(function); } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 113f13db018..d86e66b4fb8 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -89,6 +89,19 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, } +static void +llvmpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (ib) + memcpy(&llvmpipe->index_buffer, ib, sizeof(llvmpipe->index_buffer)); + else + memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer)); + + /* TODO make this more like a state */ +} void llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) @@ -98,4 +111,5 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; + llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 0c955556551..d0389f0cb0b 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -37,6 +37,7 @@ */ +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_debug.h" #include "lp_bld_blend.h" @@ -485,8 +486,7 @@ test_one(unsigned verbose, { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; blend_test_ptr_t blend_test_ptr; @@ -510,15 +510,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - if(verbose < 1) - dump_blend_type(stderr, blend, mode, type); - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -735,7 +726,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index cf41b40581f..3ba42bf11a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -35,6 +35,7 @@ #include "util/u_pointer.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" @@ -152,8 +153,7 @@ test_one(unsigned verbose, { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; conv_test_ptr_t conv_test_ptr; @@ -203,15 +203,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - if(verbose < 1) - dump_conv_types(stderr, src_type, dst_type); - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -351,7 +342,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 21df83f9d89..4653f30e39d 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -31,6 +31,8 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_assert.h" #include "gallivm/lp_bld_printf.h" #include <llvm-c/Analysis.h> @@ -74,6 +76,10 @@ add_printf_test(LLVMModuleRef module) lp_build_printf(builder, "hello, world\n"); lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0), LLVMConstInt(LLVMInt32Type(), 6, 0)); + + /* Also test lp_build_assert(). This should not fail. */ + lp_build_assert(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), "assert(1)"); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); return func; @@ -107,11 +113,16 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); +#if 0 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } +#else + (void) provider; + engine = lp_build_engine; +#endif #if 0 pass = LLVMCreatePassManager(); diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index f571a81a4a4..57b0ee57767 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -31,7 +31,7 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" #include <llvm-c/Analysis.h> @@ -121,8 +121,7 @@ test_round(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_round_t round_func, trunc_func, floor_func, ceil_func; @@ -145,13 +144,6 @@ test_round(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 1366ecddcbc..7ab357f162e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -30,8 +30,9 @@ #include <stdio.h> #include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" +#include "util/u_pointer.h" #include <llvm-c/Analysis.h> #include <llvm-c/ExecutionEngine.h> @@ -101,8 +102,7 @@ test_sincos(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_sin = NULL, test_cos = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_sincos_t sin_func; @@ -122,13 +122,6 @@ test_sincos(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -144,8 +137,8 @@ test_sincos(unsigned verbose, FILE *fp) (void)pass; #endif - sin_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_sin); - cos_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_cos); + sin_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_sin)); + cos_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_cos)); memset(unpacked, 0, sizeof unpacked); @@ -162,7 +155,6 @@ test_sincos(unsigned verbose, FILE *fp) LLVMFreeMachineCodeForFunction(engine, test_sin); LLVMFreeMachineCodeForFunction(engine, test_cos); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index c71ec8066c7..2ba39052aba 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -293,34 +293,7 @@ def generate_ssse3(): print ''' #if defined(PIPE_ARCH_SSE) - -#if defined(PIPE_ARCH_SSSE3) - -#include <tmmintrin.h> - -#else - -#include <emmintrin.h> - -/** - * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases - * where -mssse3 is not supported/enabled. - * - * MSVC will never get in here as its intrinsics support do not rely on - * compiler command line options. - */ -static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_shuffle_epi8(__m128i a, __m128i mask) -{ - __m128i result; - __asm__("pshufb %1, %0" - : "=x" (result) - : "xm" (mask), "0" (a)); - return result; -} - -#endif - +#include "util/u_sse.h" static void lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, |