diff options
Diffstat (limited to 'src/gallium/auxiliary')
38 files changed, 575 insertions, 173 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index aa1b255c0c9..428e097be09 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -107,6 +107,7 @@ C_SOURCES = \ util/u_caps.c \ util/u_cpu_detect.c \ util/u_dl.c \ + util/u_draw.c \ util/u_draw_quad.c \ util/u_format.c \ util/u_format_other.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 84519cfd048..6176f0ae2a8 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -154,6 +154,7 @@ source = [ 'util/u_dump_defines.c', 'util/u_dump_state.c', 'util/u_dl.c', + 'util/u_draw.c', 'util/u_draw_quad.c', 'util/u_format.c', 'util/u_format_other.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 95d96719873..d99f94edc43 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -751,7 +751,7 @@ void draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, - uint32_t last_level, + uint32_t first_level, uint32_t last_level, uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], const void *data[PIPE_MAX_TEXTURE_LEVELS]) @@ -760,7 +760,7 @@ draw_set_mapped_texture(struct draw_context *draw, if(draw->llvm) draw_llvm_set_mapped_texture(draw, sampler_idx, - width, height, depth, last_level, + width, height, depth, first_level, last_level, row_stride, img_stride, data); #endif } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index a0b217e4d33..7db75b71b43 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -123,7 +123,7 @@ void draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, - uint32_t last_level, + uint32_t first_level, uint32_t last_level, uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], const void *data[PIPE_MAX_TEXTURE_LEVELS]); diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a5217c1d4ec..a1b8fc38880 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -106,6 +106,7 @@ create_jit_texture_type(struct gallivm_state *gallivm) elem_types[DRAW_JIT_TEXTURE_WIDTH] = elem_types[DRAW_JIT_TEXTURE_HEIGHT] = elem_types[DRAW_JIT_TEXTURE_DEPTH] = + elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] = elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type; elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = @@ -136,6 +137,9 @@ create_jit_texture_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, target, texture_type, DRAW_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level, + target, texture_type, + DRAW_JIT_TEXTURE_FIRST_LEVEL); LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, target, texture_type, DRAW_JIT_TEXTURE_LAST_LEVEL); @@ -438,7 +442,8 @@ generate_vs(struct draw_llvm *llvm, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef system_values_array, LLVMValueRef context_ptr, - struct lp_build_sampler_soa *draw_sampler) + struct lp_build_sampler_soa *draw_sampler, + boolean clamp_vertex_color) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type; @@ -474,6 +479,30 @@ generate_vs(struct draw_llvm *llvm, outputs, sampler, &llvm->draw->vs.vertex_shader->info); + + if(clamp_vertex_color) + { + LLVMValueRef out; + unsigned chan, attrib; + struct lp_build_context bld; + struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info; + lp_build_context_init(&bld, llvm->gallivm, vs_type); + + for (attrib = 0; attrib < info->num_outputs; ++attrib) { + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + switch (info->output_semantic_name[attrib]) { + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: + out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + out = lp_build_clamp(&bld, out, bld.zero, bld.one); + LLVMBuildStore(builder, out, outputs[attrib][chan]); + break; + } + } + } + } + } } #if DEBUG_STORE @@ -1235,7 +1264,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) ptr_aos, system_values_array, context_ptr, - sampler); + sampler, + variant->key.clamp_vertex_color); /* store original positions in clip before further manipulation */ store_clip(gallivm, io, outputs); @@ -1446,7 +1476,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian ptr_aos, system_values_array, context_ptr, - sampler); + sampler, + variant->key.clamp_vertex_color); /* store original positions in clip before further manipulation */ store_clip(gallivm, io, outputs); @@ -1524,6 +1555,8 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) key = (struct draw_llvm_variant_key *)store; + key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/ + /* Presumably all variants of the shader should have the same * number of vertex elements - ie the number of shader inputs. */ @@ -1566,7 +1599,7 @@ void draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, - uint32_t last_level, + uint32_t first_level, uint32_t last_level, uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], const void *data[PIPE_MAX_TEXTURE_LEVELS]) @@ -1582,9 +1615,10 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, jit_tex->width = width; jit_tex->height = height; jit_tex->depth = depth; + jit_tex->first_level = first_level; jit_tex->last_level = last_level; - for (j = 0; j <= last_level; j++) { + for (j = first_level; j <= last_level; j++) { jit_tex->data[j] = data[j]; jit_tex->row_stride[j] = row_stride[j]; jit_tex->img_stride[j] = img_stride[j]; diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index e8623e7bcdc..375b7b8b571 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -50,6 +50,7 @@ struct draw_jit_texture uint32_t width; uint32_t height; uint32_t depth; + uint32_t first_level; uint32_t last_level; uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; @@ -64,6 +65,7 @@ enum { DRAW_JIT_TEXTURE_WIDTH = 0, DRAW_JIT_TEXTURE_HEIGHT, DRAW_JIT_TEXTURE_DEPTH, + DRAW_JIT_TEXTURE_FIRST_LEVEL, DRAW_JIT_TEXTURE_LAST_LEVEL, DRAW_JIT_TEXTURE_ROW_STRIDE, DRAW_JIT_TEXTURE_IMG_STRIDE, @@ -162,6 +164,7 @@ struct draw_llvm_variant_key { unsigned nr_vertex_elements:8; unsigned nr_samplers:8; + unsigned clamp_vertex_color:1; unsigned clip_xy:1; unsigned clip_z:1; unsigned clip_user:1; @@ -169,7 +172,7 @@ struct draw_llvm_variant_key unsigned bypass_viewport:1; unsigned need_edgeflags:1; unsigned nr_planes:4; - unsigned pad:6; + unsigned pad:5; /* Variable number of vertex elements: */ @@ -295,7 +298,7 @@ void draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, - uint32_t last_level, + uint32_t first_level, uint32_t last_level, uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], const void *data[PIPE_MAX_TEXTURE_LEVELS]); diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c index 574c7cc452f..8af34617353 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c @@ -144,6 +144,7 @@ draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base, DRAW_LLVM_TEXTURE_MEMBER(width, DRAW_JIT_TEXTURE_WIDTH, TRUE) DRAW_LLVM_TEXTURE_MEMBER(height, DRAW_JIT_TEXTURE_HEIGHT, TRUE) DRAW_LLVM_TEXTURE_MEMBER(depth, DRAW_JIT_TEXTURE_DEPTH, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(first_level,DRAW_JIT_TEXTURE_FIRST_LEVEL, TRUE) DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE) DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE) DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE) @@ -209,6 +210,7 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->dynamic_state.base.width = draw_llvm_texture_width; sampler->dynamic_state.base.height = draw_llvm_texture_height; sampler->dynamic_state.base.depth = draw_llvm_texture_depth; + sampler->dynamic_state.base.first_level = draw_llvm_texture_first_level; sampler->dynamic_state.base.last_level = draw_llvm_texture_last_level; sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride; sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 58c5858734a..61ae4665539 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -341,6 +341,16 @@ vbuf_flush_vertices( struct vbuf_stage *vbuf ) vbuf->max_vertices = vbuf->nr_vertices = 0; vbuf->vertex_ptr = vbuf->vertices = NULL; } + + /* Reset point/line/tri function pointers. + * If (for example) we transition from points to tris and back to points + * again, we need to call the vbuf_first_point() function again to flush + * the triangles before drawing more points. This can happen when drawing + * with front polygon mode = filled and back polygon mode = line or point. + */ + vbuf->stage.point = vbuf_first_point; + vbuf->stage.line = vbuf_first_line; + vbuf->stage.tri = vbuf_first_tri; } @@ -378,10 +388,6 @@ vbuf_flush( struct draw_stage *stage, unsigned flags ) struct vbuf_stage *vbuf = vbuf_stage( stage ); vbuf_flush_vertices( vbuf ); - - stage->point = vbuf_first_point; - stage->line = vbuf_first_line; - stage->tri = vbuf_first_tri; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index db2e3c5410d..b7d693f8584 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -146,6 +146,14 @@ struct draw_context struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; unsigned nr_vertex_buffers; + /* + * This is the largest legal index value for the current set of + * bound vertex buffers. Regardless of any other consideration, + * all vertex lookups need to be clamped to 0..max_index to + * prevent out-of-bound access. + */ + unsigned max_index; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned nr_vertex_elements; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c3d7e871f7a..e0eda67c1a2 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -470,6 +470,17 @@ draw_vbo(struct draw_context *draw, if (0) draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20)); + draw->pt.max_index = util_draw_max_index(draw->pt.vertex_buffer, + draw->pt.nr_vertex_buffers, + draw->pt.vertex_element, + draw->pt.nr_vertex_elements, + info); + + /* + * TODO: We could use draw->pt.max_index to further narrow + * the min_index/max_index hints given by the state tracker. + */ + for (instance = 0; instance < info->instance_count; instance++) { draw->instance_id = instance + info->start_instance; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 4fa3b265e10..5589a8212e4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -139,7 +139,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.user.max_index); + draw->pt.max_index); } translate->run_elts( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 51043102a61..0ab11d015c6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -186,7 +186,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.user.max_index); + draw->pt.max_index); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 1e926fb028e..0dbbfe2b73c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -169,7 +169,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.user.max_index ); + draw->pt.max_index ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c index a6875253094..c19dcd9e1f7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c @@ -85,7 +85,12 @@ vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags) static INLINE void vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch) { - unsigned hash = fetch % MAP_SIZE; + struct draw_context *draw = vsplit->draw; + unsigned hash; + + fetch = MIN2(fetch, draw->pt.max_index); + + hash = fetch % MAP_SIZE; if (vsplit->cache.fetches[hash] != fetch) { /* update cache */ diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h index 75dba8c39a5..bb3cbf80ad7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h @@ -47,14 +47,18 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, const ushort *draw_elts = NULL; unsigned i; + ib += istart; + /* use the ib directly */ if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) { if (icount > vsplit->max_vertices) return FALSE; for (i = 0; i < icount; i++) { - ELT_TYPE idx = ib[istart + i]; - assert(idx >= min_index && idx <= max_index); + ELT_TYPE idx = ib[i]; + if (idx < min_index || idx > max_index) { + debug_printf("warning: index out of range\n"); + } } draw_elts = (const ushort *) ib; } @@ -83,17 +87,21 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, if (!draw_elts) { if (min_index == 0) { for (i = 0; i < icount; i++) { - ELT_TYPE idx = ib[istart + i]; + ELT_TYPE idx = ib[i]; - assert(idx >= min_index && idx <= max_index); + if (idx < min_index || idx > max_index) { + debug_printf("warning: index out of range\n"); + } vsplit->draw_elts[i] = (ushort) idx; } } else { for (i = 0; i < icount; i++) { - ELT_TYPE idx = ib[istart + i]; + ELT_TYPE idx = ib[i]; - assert(idx >= min_index && idx <= max_index); + if (idx < min_index || idx > max_index) { + debug_printf("warning: index out of range\n"); + } vsplit->draw_elts[i] = (ushort) (idx - min_index); } } diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 7caad6f0053..1763dbc199f 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -104,11 +104,18 @@ draw_create_vertex_shader(struct draw_context *draw, } if (!draw->pt.middle.llvm) { +#if 0 +/* these paths don't support vertex clamping + * TODO: either add it, or remove them completely + * use LLVM instead if you want performance + * use exec instead if you want debugging/more correctness + */ #if defined(PIPE_ARCH_X86) vs = draw_create_vs_sse( draw, shader ); #elif defined(PIPE_ARCH_PPC) vs = draw_create_vs_ppc( draw, shader ); #endif +#endif } #if HAVE_LLVM else { diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index c41d7c42a01..d9c4209a42b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -95,6 +95,7 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, struct tgsi_exec_machine *machine = evs->machine; unsigned int i, j; unsigned slot; + boolean clamp_vertex_color = shader->draw->rasterizer->clamp_vertex_color; tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, constants, const_size); @@ -151,11 +152,22 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, */ for (j = 0; j < max_vertices; j++) { for (slot = 0; slot < shader->info.num_outputs; slot++) { - output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - + unsigned name = shader->info.output_semantic_name[slot]; + if(clamp_vertex_color && + (name == TGSI_SEMANTIC_COLOR || name == TGSI_SEMANTIC_BCOLOR)) + { + output[slot][0] = CLAMP(machine->Outputs[slot].xyzw[0].f[j], 0.0f, 1.0f); + output[slot][1] = CLAMP(machine->Outputs[slot].xyzw[1].f[j], 0.0f, 1.0f); + output[slot][2] = CLAMP(machine->Outputs[slot].xyzw[2].f[j], 0.0f, 1.0f); + output[slot][3] = CLAMP(machine->Outputs[slot].xyzw[3].f[j], 0.0f, 1.0f); + } + else + { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } } #if 0 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index bb2c82fe0ed..0b724a34948 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -32,7 +32,12 @@ #include <llvm/Target/TargetInstrInfo.h> #include <llvm/Support/raw_ostream.h> #include <llvm/Support/MemoryObject.h> + +#if HAVE_LLVM >= 0x0209 +#include <llvm/Support/Host.h> +#else #include <llvm/System/Host.h> +#endif #if HAVE_LLVM >= 0x0207 #include <llvm/MC/MCDisassembler.h> @@ -202,9 +207,17 @@ lp_disassemble(const void* func) } raw_debug_ostream Out; + TargetMachine *TM = T->createTargetMachine(Triple, ""); +#if HAVE_LLVM >= 0x0300 + unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); +#else int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); -#if HAVE_LLVM >= 0x0208 +#endif +#if HAVE_LLVM >= 0x0300 + OwningPtr<MCInstPrinter> Printer( + T->createMCInstPrinter(*TM, AsmPrinterVariant, *AsmInfo)); +#elif HAVE_LLVM >= 0x0208 OwningPtr<MCInstPrinter> Printer( T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo)); #else @@ -216,8 +229,6 @@ lp_disassemble(const void* func) return; } - TargetMachine *TM = T->createTargetMachine(Triple, ""); - const TargetInstrInfo *TII = TM->getInstrInfo(); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 8ad34598a92..4636371a0f5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -51,6 +51,10 @@ */ #define BRILINEAR_FACTOR 2 +static LLVMValueRef +lp_build_minify(struct lp_build_context *bld, + LLVMValueRef base_size, + LLVMValueRef level); /** * Does the given texture wrap mode allow sampling the texture border color? @@ -184,9 +188,11 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, */ static LLVMValueRef lp_build_rho(struct lp_build_sample_context *bld, + unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4]) { + struct lp_build_context *int_size_bld = &bld->int_size_bld; struct lp_build_context *float_size_bld = &bld->float_size_bld; struct lp_build_context *float_bld = &bld->float_bld; const unsigned dims = bld->dims; @@ -198,8 +204,9 @@ lp_build_rho(struct lp_build_sample_context *bld, LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; LLVMValueRef rho_x, rho_y; LLVMValueRef rho_vec; - LLVMValueRef float_size; + LLVMValueRef int_size, float_size; LLVMValueRef rho; + LLVMValueRef first_level, first_level_vec; dsdx = ddx[0]; dsdy = ddy[0]; @@ -235,7 +242,11 @@ lp_build_rho(struct lp_build_sample_context *bld, rho_vec = lp_build_max(float_size_bld, rho_x, rho_y); - float_size = lp_build_int_to_float(float_size_bld, bld->int_size); + first_level = bld->dynamic_state->first_level(bld->dynamic_state, + bld->gallivm, unit); + first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level); + int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec); + float_size = lp_build_int_to_float(float_size_bld, int_size); rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); @@ -442,7 +453,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, else { LLVMValueRef rho; - rho = lp_build_rho(bld, ddx, ddy); + rho = lp_build_rho(bld, unit, ddx, ddy); /* * Compute lod = log2(rho) @@ -542,18 +553,18 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, LLVMValueRef *level_out) { struct lp_build_context *int_bld = &bld->int_bld; - LLVMValueRef last_level, level; - - LLVMValueRef zero = lp_build_const_int32(bld->gallivm, 0); + LLVMValueRef first_level, last_level, level; + first_level = bld->dynamic_state->first_level(bld->dynamic_state, + bld->gallivm, unit); last_level = bld->dynamic_state->last_level(bld->dynamic_state, bld->gallivm, unit); /* convert float lod to integer */ - level = lod_ipart; + level = lp_build_add(int_bld, lod_ipart, first_level); /* clamp level to legal range of levels */ - *level_out = lp_build_clamp(int_bld, level, zero, last_level); + *level_out = lp_build_clamp(int_bld, level, first_level, last_level); } @@ -573,39 +584,42 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context *int_bld = &bld->int_bld; struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef last_level; + LLVMValueRef first_level, last_level; LLVMValueRef clamp_min; LLVMValueRef clamp_max; - *level0_out = lod_ipart; - *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one); + first_level = bld->dynamic_state->first_level(bld->dynamic_state, + bld->gallivm, unit); + + *level0_out = lp_build_add(int_bld, lod_ipart, first_level); + *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); last_level = bld->dynamic_state->last_level(bld->dynamic_state, bld->gallivm, unit); /* - * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the - * minimum number of comparisons, and zeroing lod_fpart in the extreme + * Clamp both *level0_out and *level1_out to [first_level, last_level], with + * the minimum number of comparisons, and zeroing lod_fpart in the extreme * ends in the process. */ - /* lod_ipart < 0 */ + /* *level0_out < first_level */ clamp_min = LLVMBuildICmp(builder, LLVMIntSLT, - lod_ipart, int_bld->zero, - "clamp_lod_to_zero"); + *level0_out, first_level, + "clamp_lod_to_first"); *level0_out = LLVMBuildSelect(builder, clamp_min, - int_bld->zero, *level0_out, ""); + first_level, *level0_out, ""); *level1_out = LLVMBuildSelect(builder, clamp_min, - int_bld->zero, *level1_out, ""); + first_level, *level1_out, ""); *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min, float_bld->zero, *lod_fpart_inout, ""); - /* lod_ipart >= last_level */ + /* *level0_out >= last_level */ clamp_max = LLVMBuildICmp(builder, LLVMIntSGE, - lod_ipart, last_level, + *level0_out, last_level, "clamp_lod_to_last"); *level0_out = LLVMBuildSelect(builder, clamp_max, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 8c9d5df4e43..a71e656fe0e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -120,6 +120,12 @@ struct lp_sampler_dynamic_state struct gallivm_state *gallivm, unsigned unit); + /** Obtain the first mipmap level (base level) (returns int32) */ + LLVMValueRef + (*first_level)( const struct lp_sampler_dynamic_state *state, + struct gallivm_state *gallivm, + unsigned unit); + /** Obtain the number of mipmap levels minus one (returns int32) */ LLVMValueRef (*last_level)( const struct lp_sampler_dynamic_state *state, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index e61cf9541ea..4f160159968 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -939,6 +939,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; struct lp_build_context h16_bld; + LLVMValueRef first_level; LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); /* we only support the common/simple wrap modes at this time */ @@ -1008,7 +1009,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - ilevel0 = i32t_zero; + first_level = bld->dynamic_state->first_level(bld->dynamic_state, + bld->gallivm, unit); + ilevel0 = first_level; } break; case PIPE_TEX_MIPFILTER_NEAREST: @@ -1065,7 +1068,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, s, t, r, - i32t_zero, NULL, NULL, + ilevel0, NULL, NULL, packed_lo, packed_hi); } lp_build_endif(&if_ctx); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 9961ba08f3a..4ea7b4bd8a2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -943,6 +943,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef face_ddx[4], face_ddy[4]; LLVMValueRef texels[4]; + LLVMValueRef first_level; LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); unsigned chan; @@ -1009,7 +1010,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld, lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - ilevel0 = i32t_zero; + first_level = bld->dynamic_state->first_level(bld->dynamic_state, + bld->gallivm, unit); + ilevel0 = first_level; } break; case PIPE_TEX_MIPFILTER_NEAREST: @@ -1068,7 +1071,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, lp_build_sample_mipmap(bld, unit, mag_filter, PIPE_TEX_MIPFILTER_NONE, s, t, r, - i32t_zero, NULL, NULL, + ilevel0, NULL, NULL, texels); } lp_build_endif(&if_ctx); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index b03dd3a0cf8..5231bb08374 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -726,6 +726,16 @@ void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src emit_modrm(p, dst, src); } +void x86_cmovcc( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + enum x86_cc cc) +{ + DUMP_RRI( dst, src, cc ); + emit_2ub( p, 0x0f, 0x40 + cc ); + emit_modrm( p, dst, src ); +} + void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 2b9678b1765..af0565bb59d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -309,6 +309,7 @@ void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc ); void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_dec( struct x86_function *p, struct x86_reg reg ); void x86_inc( struct x86_function *p, struct x86_reg reg ); diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index ad809db720d..e4e1c868333 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -381,14 +381,17 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * if (tg->attrib[attr].instance_divisor) { index = instance_id / tg->attrib[attr].instance_divisor; + /* XXX we need to clamp the index here too, but to a + * per-array max value, not the draw->pt.max_index value + * that's being given to us via translate->set_buffer(). + */ } else { index = elt; + /* clamp to avoid going out of bounds */ + index = MIN2(index, tg->attrib[attr].max_index); } - /* clamp to void going out of bounds */ - index = MIN2(index, tg->attrib[attr].max_index); - src = tg->attrib[attr].input_ptr + tg->attrib[attr].input_stride * index; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index ef7f4be4c3e..c3d15661f05 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -53,7 +53,7 @@ struct translate_buffer { unsigned max_index; }; -struct translate_buffer_varient { +struct translate_buffer_variant { unsigned buffer_index; unsigned instance_divisor; void *ptr; /* updated either per vertex or per instance */ @@ -103,12 +103,12 @@ struct translate_sse { struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; unsigned nr_buffers; - /* Multiple buffer varients can map to a single buffer. */ - struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; - unsigned nr_buffer_varients; + /* Multiple buffer variants can map to a single buffer. */ + struct translate_buffer_variant buffer_variant[PIPE_MAX_ATTRIBS]; + unsigned nr_buffer_variants; - /* Multiple elements can map to a single buffer varient. */ - unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; + /* Multiple elements can map to a single buffer variant. */ + unsigned element_to_buffer_variant[PIPE_MAX_ATTRIBS]; boolean use_instancing; unsigned instance_id; @@ -1062,15 +1062,17 @@ static boolean init_inputs( struct translate_sse *p, struct x86_reg instance_id = x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)); - for (i = 0; i < p->nr_buffer_varients; i++) { - struct translate_buffer_varient *varient = &p->buffer_varient[i]; - struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; + for (i = 0; i < p->nr_buffer_variants; i++) { + struct translate_buffer_variant *variant = &p->buffer_variant[i]; + struct translate_buffer *buffer = &p->buffer[variant->buffer_index]; - if (!index_size || varient->instance_divisor) { + if (!index_size || variant->instance_divisor) { + struct x86_reg buf_max_index = x86_make_disp(p->machine_EDI, + get_offset(p, &buffer->max_index)); struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, get_offset(p, &buffer->stride)); struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, - get_offset(p, &varient->ptr)); + get_offset(p, &variant->ptr)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI, get_offset(p, &buffer->base_ptr)); struct x86_reg elt = p->idx_ESI; @@ -1079,12 +1081,12 @@ static boolean init_inputs( struct translate_sse *p, /* Calculate pointer to first attrib: * base_ptr + stride * index, where index depends on instance divisor */ - if (varient->instance_divisor) { + if (variant->instance_divisor) { /* Our index is instance ID divided by instance divisor. */ x86_mov(p->func, tmp_EAX, instance_id); - if (varient->instance_divisor != 1) { + if (variant->instance_divisor != 1) { struct x86_reg tmp_EDX = p->tmp2_EDX; struct x86_reg tmp_ECX = p->src_ECX; @@ -1093,26 +1095,33 @@ static boolean init_inputs( struct translate_sse *p, */ x86_xor(p->func, tmp_EDX, tmp_EDX); - x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); + x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor); x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ } + + /* XXX we need to clamp the index here too, but to a + * per-array max value, not the draw->pt.max_index value + * that's being given to us via translate->set_buffer(). + */ } else { x86_mov(p->func, tmp_EAX, elt); - } - /* - * TODO: Respect translate_buffer::max_index. - */ + /* Clamp to max_index + */ + x86_cmp(p->func, tmp_EAX, buf_max_index); + x86_cmovcc(p->func, tmp_EAX, buf_max_index, cc_AE); + } x86_imul(p->func, tmp_EAX, buf_stride); x64_rexw(p->func); x86_add(p->func, tmp_EAX, buf_base_ptr); + x86_cmp(p->func, p->count_EBP, p->tmp_EAX); /* In the linear case, keep the buffer pointer instead of the * index number. */ - if (!index_size && p->nr_buffer_varients == 1) + if (!index_size && p->nr_buffer_variants == 1) { x64_rexw(p->func); x86_mov(p->func, elt, tmp_EAX); @@ -1138,14 +1147,14 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, return x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)); } - if (!index_size && p->nr_buffer_varients == 1) { + if (!index_size && p->nr_buffer_variants == 1) { return p->idx_ESI; } - else if (!index_size || p->buffer_varient[var_idx].instance_divisor) { + else if (!index_size || p->buffer_variant[var_idx].instance_divisor) { struct x86_reg ptr = p->src_ECX; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, - get_offset(p, &p->buffer_varient[var_idx].ptr)); + get_offset(p, &p->buffer_variant[var_idx].ptr)); x64_rexw(p->func); x86_mov(p->func, ptr, buf_ptr); @@ -1153,15 +1162,19 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, } else { struct x86_reg ptr = p->src_ECX; - const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; + const struct translate_buffer_variant *variant = &p->buffer_variant[var_idx]; struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, - get_offset(p, &p->buffer[varient->buffer_index].stride)); + get_offset(p, &p->buffer[variant->buffer_index].stride)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI, - get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); + get_offset(p, &p->buffer[variant->buffer_index].base_ptr)); + + struct x86_reg buf_max_index = + x86_make_disp(p->machine_EDI, + get_offset(p, &p->buffer[variant->buffer_index].max_index)); @@ -1179,6 +1192,12 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, x86_mov(p->func, ptr, elt); break; } + + /* Clamp to max_index + */ + x86_cmp(p->func, ptr, buf_max_index); + x86_cmovcc(p->func, ptr, buf_max_index, cc_AE); + x86_imul(p->func, ptr, buf_stride); x64_rexw(p->func); x86_add(p->func, ptr, buf_base_ptr); @@ -1191,11 +1210,11 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, static boolean incr_inputs( struct translate_sse *p, unsigned index_size ) { - if (!index_size && p->nr_buffer_varients == 1) { + if (!index_size && p->nr_buffer_variants == 1) { struct x86_reg stride = x86_make_disp(p->machine_EDI, get_offset(p, &p->buffer[0].stride)); - if (p->buffer_varient[0].instance_divisor == 0) { + if (p->buffer_variant[0].instance_divisor == 0) { x64_rexw(p->func); x86_add(p->func, p->idx_ESI, stride); sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192)); @@ -1206,14 +1225,14 @@ static boolean incr_inputs( struct translate_sse *p, /* Is this worthwhile?? */ - for (i = 0; i < p->nr_buffer_varients; i++) { - struct translate_buffer_varient *varient = &p->buffer_varient[i]; + for (i = 0; i < p->nr_buffer_variants; i++) { + struct translate_buffer_variant *variant = &p->buffer_variant[i]; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, - get_offset(p, &varient->ptr)); + get_offset(p, &variant->ptr)); struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, - get_offset(p, &p->buffer[varient->buffer_index].stride)); + get_offset(p, &p->buffer[variant->buffer_index].stride)); - if (varient->instance_divisor == 0) { + if (variant->instance_divisor == 0) { x86_mov(p->func, p->tmp_EAX, buf_stride); x64_rexw(p->func); x86_add(p->func, p->tmp_EAX, buf_ptr); @@ -1323,18 +1342,18 @@ static boolean build_vertex_emit( struct translate_sse *p, label = x86_get_label(p->func); { struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI); - int last_varient = -1; + int last_variant = -1; struct x86_reg vb; for (j = 0; j < p->translate.key.nr_elements; j++) { const struct translate_element *a = &p->translate.key.element[j]; - unsigned varient = p->element_to_buffer_varient[j]; + unsigned variant = p->element_to_buffer_variant[j]; /* Figure out source pointer address: */ - if (varient != last_varient) { - last_varient = varient; - vb = get_buffer_ptr(p, index_size, varient, elt); + if (variant != last_variant) { + last_variant = variant; + vb = get_buffer_ptr(p, index_size, variant, elt); } if (!translate_attr( p, a, @@ -1460,24 +1479,24 @@ struct translate *translate_sse2_create( const struct translate_key *key ) } /* - * Map vertex element to vertex buffer varient. + * Map vertex element to vertex buffer variant. */ - for (j = 0; j < p->nr_buffer_varients; j++) { - if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && - p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + for (j = 0; j < p->nr_buffer_variants; j++) { + if (p->buffer_variant[j].buffer_index == key->element[i].input_buffer && + p->buffer_variant[j].instance_divisor == key->element[i].instance_divisor) { break; } } - if (j == p->nr_buffer_varients) { - p->buffer_varient[j].buffer_index = key->element[i].input_buffer; - p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; - p->nr_buffer_varients++; + if (j == p->nr_buffer_variants) { + p->buffer_variant[j].buffer_index = key->element[i].input_buffer; + p->buffer_variant[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_variants++; } - p->element_to_buffer_varient[i] = j; + p->element_to_buffer_variant[i] = j; } else { assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); - p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; + p->element_to_buffer_variant[i] = ELEMENT_BUFFER_INSTANCE_ID; } } diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 32519b148b6..e0c8f73c7d3 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -270,13 +270,15 @@ util_cpu_detect(void) util_cpu_caps.x86_cpu_type = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ /* general feature flags */ - util_cpu_caps.has_tsc = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */ - util_cpu_caps.has_mmx = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ - util_cpu_caps.has_sse = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */ - util_cpu_caps.has_sse2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */ - util_cpu_caps.has_sse3 = (regs2[2] & (1)); /* 0x0000001 */ - util_cpu_caps.has_ssse3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */ - util_cpu_caps.has_sse4_1 = (regs2[2] & (1 << 19)) >> 19; + util_cpu_caps.has_tsc = (regs2[3] >> 8) & 1; /* 0x0000010 */ + util_cpu_caps.has_mmx = (regs2[3] >> 23) & 1; /* 0x0800000 */ + util_cpu_caps.has_sse = (regs2[3] >> 25) & 1; /* 0x2000000 */ + util_cpu_caps.has_sse2 = (regs2[3] >> 26) & 1; /* 0x4000000 */ + util_cpu_caps.has_sse3 = (regs2[2] >> 0) & 1; /* 0x0000001 */ + util_cpu_caps.has_ssse3 = (regs2[2] >> 9) & 1; /* 0x0000020 */ + util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1; + util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1; + util_cpu_caps.has_avx = (regs2[2] >> 28) & 1; util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ cacheline = ((regs2[1] >> 8) & 0xFF) * 8; @@ -290,10 +292,10 @@ util_cpu_detect(void) cpuid(0x80000001, regs2); - util_cpu_caps.has_mmx |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ - util_cpu_caps.has_mmx2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */ - util_cpu_caps.has_3dnow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */ - util_cpu_caps.has_3dnow_ext = (regs2[3] & (1 << 30 )) >> 30; + util_cpu_caps.has_mmx |= (regs2[3] >> 23) & 1; + util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1; + util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1; + util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1; } if (regs[0] >= 0x80000006) { @@ -329,6 +331,8 @@ util_cpu_detect(void) debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); + debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2); + debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx); debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h index f3bef0993c7..856e8d7a0ef 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.h +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -53,6 +53,8 @@ struct util_cpu_caps { unsigned has_sse3:1; unsigned has_ssse3:1; unsigned has_sse4_1:1; + unsigned has_sse4_2:1; + unsigned has_avx:1; unsigned has_3dnow:1; unsigned has_3dnow_ext:1; unsigned has_altivec:1; diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h index 42adb1f0699..2384c36cb13 100644 --- a/src/gallium/auxiliary/util/u_double_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -39,6 +39,7 @@ #include <stddef.h> +#include "pipe/p_compiler.h" struct list_head @@ -47,50 +48,56 @@ struct list_head struct list_head *next; }; +static INLINE void list_inithead(struct list_head *item) +{ + item->prev = item; + item->next = item; +} + +static INLINE void list_add(struct list_head *item, struct list_head *list) +{ + item->prev = list; + item->next = list->next; + list->next->prev = item; + list->next = item; +} + +static INLINE void list_addtail(struct list_head *item, struct list_head *list) +{ + item->next = list; + item->prev = list->prev; + list->prev->next = item; + list->prev = item; +} + +static INLINE void list_replace(struct list_head *from, struct list_head *to) +{ + to->prev = from->prev; + to->next = from->next; + from->next->prev = to; + from->prev->next = to; +} + +static INLINE void list_del(struct list_head *item) +{ + item->prev->next = item->next; + item->next->prev = item->prev; +} -#define LIST_INITHEAD(__item) \ - do { \ - (__item)->prev = (__item); \ - (__item)->next = (__item); \ - } while (0) - -#define LIST_ADD(__item, __list) \ - do { \ - (__item)->prev = (__list); \ - (__item)->next = (__list)->next; \ - (__list)->next->prev = (__item); \ - (__list)->next = (__item); \ - } while (0) - -#define LIST_ADDTAIL(__item, __list) \ - do { \ - (__item)->next = (__list); \ - (__item)->prev = (__list)->prev; \ - (__list)->prev->next = (__item); \ - (__list)->prev = (__item); \ - } while(0) - -#define LIST_REPLACE(__from, __to) \ - do { \ - (__to)->prev = (__from)->prev; \ - (__to)->next = (__from)->next; \ - (__from)->next->prev = (__to); \ - (__from)->prev->next = (__to); \ - } while (0) - -#define LIST_DEL(__item) \ - do { \ - (__item)->prev->next = (__item)->next; \ - (__item)->next->prev = (__item)->prev; \ - } while(0) - -#define LIST_DELINIT(__item) \ - do { \ - (__item)->prev->next = (__item)->next; \ - (__item)->next->prev = (__item)->prev; \ - (__item)->next = (__item); \ - (__item)->prev = (__item); \ - } while(0) +static INLINE void list_delinit(struct list_head *item) +{ + item->prev->next = item->next; + item->next->prev = item->prev; + item->next = item; + item->prev = item; +} + +#define LIST_INITHEAD(__item) list_inithead(__item) +#define LIST_ADD(__item, __list) list_add(__item, __list) +#define LIST_ADDTAIL(__item, __list) list_addtail(__item, __list) +#define LIST_REPLACE(__from, __to) list_replace(__from, __to) +#define LIST_DEL(__item) list_del(__item) +#define LIST_DELINIT(__item) list_delinit(__item) #define LIST_ENTRY(__type, __item, __field) \ ((__type *)(((char *)(__item)) - offsetof(__type, __field))) @@ -114,4 +121,21 @@ struct list_head storage = container_of(pos->member.next, pos, member); \ &pos->member != (head); \ pos = storage, storage = container_of(storage->member.next, storage, member)) + +#define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \ + for (pos = container_of((head)->prev, pos, member), \ + storage = container_of(pos->member.prev, pos, member); \ + &pos->member != (head); \ + pos = storage, storage = container_of(storage->member.prev, storage, member)) + +#define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \ + for (pos = container_of((start), pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.next, pos, member)) + +#define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \ + for (pos = container_of((start), pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.prev, pos, member)) + #endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_draw.c b/src/gallium/auxiliary/util/u_draw.c new file mode 100644 index 00000000000..20837be5e59 --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw.c @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2011 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "util/u_draw.h" + + +/** + * Returns the largest legal index value for the current set of bound vertex + * buffers. Regardless of any other consideration, all vertex lookups need to + * be clamped to 0..max_index to prevent an out-of-bound access. + */ +unsigned +util_draw_max_index( + const struct pipe_vertex_buffer *vertex_buffers, + unsigned nr_vertex_buffers, + const struct pipe_vertex_element *vertex_elements, + unsigned nr_vertex_elements, + const struct pipe_draw_info *info) +{ + unsigned max_index; + unsigned i; + + max_index = ~0; + for (i = 0; i < nr_vertex_elements; i++) { + const struct pipe_vertex_element *element = + &vertex_elements[i]; + const struct pipe_vertex_buffer *buffer = + &vertex_buffers[element->vertex_buffer_index]; + unsigned buffer_size; + const struct util_format_description *format_desc; + unsigned format_size; + + assert(buffer->buffer->height0 == 1); + assert(buffer->buffer->depth0 == 1); + buffer_size = buffer->buffer->width0; + + format_desc = util_format_description(element->src_format); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits % 8 == 0); + format_size = format_desc->block.bits/8; + + assert(buffer_size - buffer->buffer_offset <= buffer_size); + buffer_size -= buffer->buffer_offset; + + assert(buffer_size - element->src_offset <= buffer_size); + buffer_size -= element->src_offset; + + assert(buffer_size - format_size <= buffer_size); + buffer_size -= format_size; + + if (buffer->stride != 0) { + unsigned buffer_max_index; + + buffer_max_index = buffer_size / buffer->stride; + + if (element->instance_divisor == 0) { + /* Per-vertex data */ + max_index = MIN2(max_index, buffer_max_index); + } + else { + /* Per-instance data. Simply make sure the state tracker didn't + * request more instances than those that fit in the buffer */ + assert((info->start_instance + info->instance_count)/element->instance_divisor + <= (buffer_max_index + 1)); + } + } + } + + return max_index; +} diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h index f06d09ef91d..fdb683ca8a2 100644 --- a/src/gallium/auxiliary/util/u_draw.h +++ b/src/gallium/auxiliary/util/u_draw.h @@ -34,6 +34,11 @@ #include "pipe/p_state.h" +#ifdef __cplusplus +extern "C" { +#endif + + static INLINE void util_draw_init_info(struct pipe_draw_info *info) { @@ -136,4 +141,18 @@ util_draw_range_elements(struct pipe_context *pipe, pipe->draw_vbo(pipe, &info); } + +unsigned +util_draw_max_index( + const struct pipe_vertex_buffer *vertex_buffers, + unsigned nr_vertex_buffers, + const struct pipe_vertex_element *vertex_elements, + unsigned nr_vertex_elements, + const struct pipe_draw_info *info); + + +#ifdef __cplusplus +} #endif + +#endif /* !U_DRAW_H */ diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 4896faa12bf..9cbdd0a5b99 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -36,6 +36,55 @@ #include "u_memory.h" #include "u_rect.h" #include "u_format.h" +#include "u_format_s3tc.h" + +#include "pipe/p_defines.h" + + +boolean +util_format_is_float(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + assert(desc); + if (!desc) { + return FALSE; + } + + /* Find the first non-void channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) { + return FALSE; + } + + return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE; +} + + +boolean +util_format_is_supported(enum pipe_format format, unsigned bind) +{ + if (util_format_is_s3tc(format) && !util_format_s3tc_enabled) { + return FALSE; + } + +#ifndef TEXTURE_FLOAT_ENABLED + if ((bind & PIPE_BIND_RENDER_TARGET) && + format != PIPE_FORMAT_R9G9B9E5_FLOAT && + format != PIPE_FORMAT_R11G11B10_FLOAT && + util_format_is_float(format)) { + return FALSE; + } +#endif + + return TRUE; +} void diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 22c63ae162a..e4b3f5df6ee 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -88,6 +88,22 @@ PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, r PIPE_FORMAT_A16_UNORM , plain, 1, 1, un16, , , , 000x, rgb PIPE_FORMAT_I16_UNORM , plain, 1, 1, un16, , , , xxxx, rgb PIPE_FORMAT_L16A16_UNORM , plain, 1, 1, un16, un16, , , xxxy, rgb +PIPE_FORMAT_A8_SNORM , plain, 1, 1, sn8 , , , , 000x, rgb +PIPE_FORMAT_L8_SNORM , plain, 1, 1, sn8 , , , , xxx1, rgb +PIPE_FORMAT_L8A8_SNORM , plain, 1, 1, sn8 , sn8 , , , xxxy, rgb +PIPE_FORMAT_I8_SNORM , plain, 1, 1, sn8 , , , , xxxx, rgb +PIPE_FORMAT_A16_SNORM , plain, 1, 1, sn16, , , , 000x, rgb +PIPE_FORMAT_L16_SNORM , plain, 1, 1, sn16, , , , xxx1, rgb +PIPE_FORMAT_L16A16_SNORM , plain, 1, 1, sn16, sn16, , , xxxy, rgb +PIPE_FORMAT_I16_SNORM , plain, 1, 1, sn16, , , , xxxx, rgb +PIPE_FORMAT_A16_FLOAT , plain, 1, 1, f16 , , , , 000x, rgb +PIPE_FORMAT_L16_FLOAT , plain, 1, 1, f16 , , , , xxx1, rgb +PIPE_FORMAT_L16A16_FLOAT , plain, 1, 1, f16 , f16 , , , xxxy, rgb +PIPE_FORMAT_I16_FLOAT , plain, 1, 1, f16 , , , , xxxx, rgb +PIPE_FORMAT_A32_FLOAT , plain, 1, 1, f32 , , , , 000x, rgb +PIPE_FORMAT_L32_FLOAT , plain, 1, 1, f32 , , , , xxx1, rgb +PIPE_FORMAT_L32A32_FLOAT , plain, 1, 1, f32 , f32 , , , xxxy, rgb +PIPE_FORMAT_I32_FLOAT , plain, 1, 1, f32 , , , , xxxx, rgb # SRGB formats PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 7659a802a41..bb3ed72e932 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -473,6 +473,10 @@ util_format_colormask(const struct util_format_description *desc) } +boolean +util_format_is_float(enum pipe_format format); + + /** * Whether the src format can be blitted to destation format with a simple * memcpy. @@ -481,6 +485,12 @@ boolean util_is_format_compatible(const struct util_format_description *src_desc, const struct util_format_description *dst_desc); +/** + * Whether the format is supported by Gallium for the given bindings. + * This covers S3TC textures and floating-point render targets. + */ +boolean +util_format_is_supported(enum pipe_format format, unsigned bind); /** * Whether this format is a rgab8 variant. diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 9caf76c802b..4c5cc4da182 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -157,6 +157,9 @@ util_resource_copy_region(struct pipe_context *pipe, unsigned h = src_box->height; assert(src && dst); + assert((src->target == PIPE_BUFFER && dst->target == PIPE_BUFFER) || + (src->target != PIPE_BUFFER && dst->target != PIPE_BUFFER)); + if (!src || !dst) return; @@ -188,15 +191,19 @@ util_resource_copy_region(struct pipe_context *pipe, assert(dst_map); if (src_map && dst_map) { - util_copy_rect(dst_map, - dst_format, - dst_trans->stride, - 0, 0, - w, h, - src_map, - src_trans->stride, - 0, - 0); + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + memcpy(dst_map, src_map, w); + } else { + util_copy_rect(dst_map, + dst_format, + dst_trans->stride, + 0, 0, + w, h, + src_map, + src_trans->stride, + 0, + 0); + } } pipe->transfer_unmap(pipe, src_trans); diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 521ac07747c..dea2928f950 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -499,7 +499,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, /* Update the maximum index. */ mgr->b.max_index = MIN2(mgr->b.max_index, - (vb->buffer->width0 - vb->buffer_offset) / vb->stride); + (vb->buffer->width0 - vb->buffer_offset) / vb->stride - 1); } for (; i < mgr->b.nr_real_vertex_buffers; i++) { @@ -572,7 +572,11 @@ void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb, int min_index, max_index; min_index = info->min_index - info->index_bias; - max_index = info->max_index - info->index_bias; + if (info->max_index == ~0) { + max_index = mgr->b.max_index; + } else { + max_index = MIN2(info->max_index - info->index_bias, mgr->b.max_index); + } /* Translate vertices with non-native layouts or formats. */ if (mgr->incompatible_vb_layout || mgr->ve->incompatible_layout) { diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h index 8b241854c83..9380dce4f72 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.h +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -53,7 +53,7 @@ struct u_vbuf_mgr { int nr_real_vertex_buffers; /* Precomputed max_index for hardware vertex buffers. */ - int max_index; + unsigned max_index; /* This uploader can optionally be used by the driver. * diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 16b204df676..5027db4314e 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -655,7 +655,7 @@ init_zscan(struct vl_mpeg12_decoder *dec) static bool init_idct(struct vl_mpeg12_decoder *dec) { - struct pipe_sampler_view *matrix, *transpose; + struct pipe_sampler_view *matrix, *transpose = NULL; float matrix_scale, transpose_scale; dec->nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS); |