diff options
author | Roland Scheidegger <[email protected]> | 2010-04-30 15:27:13 +0200 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2010-04-30 15:27:13 +0200 |
commit | 7662e3519bef3802024da3050b886068281e02b1 (patch) | |
tree | bc5bf675e67febffaf9e4d1fadd139e8c253ae32 /src/gallium/auxiliary | |
parent | 9a966b93c03aecac8eb72e6133aa9a2f2c7673f3 (diff) | |
parent | 7c769bef052fc1936d7ab33e291bb4646dc5b0d1 (diff) |
Merge commit 'origin/master' into gallium-msaa
Diffstat (limited to 'src/gallium/auxiliary')
35 files changed, 725 insertions, 362 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index f8e65cf6c61..2daed382cf3 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -101,6 +101,7 @@ C_SOURCES = \ util/u_blit.c \ util/u_blitter.c \ util/u_cache.c \ + util/u_caps.c \ util/u_cpu_detect.c \ util/u_dl.c \ util/u_draw_quad.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index db3a1e73114..a0673df8a8e 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -144,6 +144,7 @@ source = [ 'util/u_blit.c', 'util/u_blitter.c', 'util/u_cache.c', + 'util/u_caps.c', 'util/u_cpu_detect.c', 'util/u_debug.c', 'util/u_debug_memory.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 030683f60f1..55e0ded1b30 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -314,10 +314,13 @@ void cso_release_all( struct cso_context *ctx ) } +/** + * Free the CSO context. NOTE: the state tracker should have previously called + * cso_release_all(). + */ void cso_destroy_context( struct cso_context *ctx ) { if (ctx) { - /*cso_release_all( ctx );*/ FREE( ctx ); } } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 703b46b518b..3b2df054c3c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -24,6 +24,8 @@ /* generates the draw jit function */ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); static void init_globals(struct draw_llvm *llvm) @@ -218,6 +220,7 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); draw_llvm_generate(llvm, variant); + draw_llvm_generate_elts(llvm, variant); return variant; } @@ -696,6 +699,158 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_disassemble(variant->jit_func); } + +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_context bld_int; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + LLVMValueRef fetch_max; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ + arg_types[4] = LLVMInt32Type(); /* fetch_count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function_elts, 0); + io_ptr = LLVMGetParam(variant->function_elts, 1); + vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); + fetch_elts = LLVMGetParam(variant->function_elts, 3); + fetch_count = LLVMGetParam(variant->function_elts, 4); + stride = LLVMGetParam(variant->function_elts, 5); + vb_ptr = LLVMGetParam(variant->function_elts, 6); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(fetch_elts, "fetch_elts"); + lp_build_name(fetch_count, "fetch_count"); + lp_build_name(stride, "stride"); + lp_build_name(vb_ptr, "vb"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function_elts, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld_int, builder, lp_type_int(32)); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + + fetch_max = LLVMBuildSub(builder, fetch_count, + LLVMConstInt(LLVMInt32Type(), 1, 0), + "fetch_max"); + + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = lp_loop.counter; + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + LLVMValueRef fetch_ptr; + + /* make sure we're not out of bounds which can happen + * if fetch_count % 4 != 0, because on the last iteration + * a few of the 4 vertex fetches will be out of bounds */ + true_index = lp_build_min(&bld_int, true_index, fetch_max); + + fetch_ptr = LLVMBuildGEP(builder, fetch_elts, + &true_index, 1, ""); + true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, + 0); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, + &vb_index, 1, ""); + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vb, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function_elts); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); + + if (0) { + LLVMDumpValue(variant->function_elts); + debug_printf("\n"); + } + variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal( + llvm->draw->engine, variant->function_elts); + + if (0) + lp_disassemble(variant->jit_func_elts); +} + void draw_llvm_make_variant_key(struct draw_llvm *llvm, struct draw_llvm_variant_key *key) diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 20f4f3da5f3..58fee7f9d60 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -94,6 +94,16 @@ typedef void unsigned stride, struct pipe_vertex_buffer *vertex_buffers); + +typedef void +(*draw_jit_vert_func_elts)(struct draw_jit_context *context, + struct vertex_header *io, + const char *vbuffers[PIPE_MAX_ATTRIBS], + const unsigned *fetch_elts, + unsigned fetch_count, + unsigned stride, + struct pipe_vertex_buffer *vertex_buffers); + struct draw_llvm { struct draw_context *draw; @@ -122,7 +132,9 @@ struct draw_llvm_variant { struct draw_llvm_variant_key key; LLVMValueRef function; + LLVMValueRef function_elts; draw_jit_vert_func jit_func; + draw_jit_vert_func_elts jit_func_elts; struct draw_llvm_variant *next; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 1d9395e06c3..b853f3a89f8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -38,10 +38,10 @@ #include "util/u_prim.h" -DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE); -DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE) #ifdef HAVE_LLVM -DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE) #endif static unsigned trim( unsigned count, unsigned first, unsigned incr ) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index f71271bd915..d2a492f2b4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -167,8 +167,6 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vshader = draw->vs.vertex_shader; - struct draw_geometry_shader *gshader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( fetch_count, 4 ); @@ -182,35 +180,13 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, return; } - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run( fpme->fetch, - fetch_elts, - fetch_count, - (char *)pipeline_verts ); - - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. If there is no shader, eg if - * bypass_vs_clip_and_viewport, then the inputs == outputs, and are - * already in the correct place.*/ - if (opt & PT_SHADE) - { - vshader->run_linear(vshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - if (gshader) - draw_geometry_shader_run(gshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - } + fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + fetch_elts, + fetch_count, + fpme->vertex_size, + draw->pt.vertex_buffer ); if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 0c059573505..c2832eefa2a 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -46,7 +46,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" -DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) void draw_vs_set_constants(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index efead42335f..6c7e94db433 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -169,8 +169,9 @@ draw_create_vs_ppc(struct draw_context *draw, struct draw_vs_varient_key; struct draw_vertex_shader; -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); @@ -188,8 +189,9 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw, struct translate *draw_vs_get_emit( struct draw_context *draw, struct translate_key *key ); -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index e7121f36541..19f49e34c8b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2089,13 +2089,21 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, } +/** cast wrapper */ +static INLINE struct draw_vs_varient_aos_sse * +draw_vs_varient_aos_sse(struct draw_vs_varient *varient) +{ + return (struct draw_vs_varient_aos_sse *) varient; +} + static void vaos_set_buffer( struct draw_vs_varient *varient, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_stride) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); if (buf < vaos->nr_vb) { vaos->buffer[buf].base_ptr = (char *)ptr; @@ -2112,7 +2120,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2136,7 +2144,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2165,7 +2173,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, static void vaos_destroy( struct draw_vs_varient *varient ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); FREE( vaos->buffer ); @@ -2241,13 +2249,14 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, } -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { struct draw_vs_varient *varient = varient_aos_sse( vs, key ); if (varient == NULL) { - varient = draw_vs_varient_generic( vs, key ); + varient = draw_vs_create_varient_generic( vs, key ); } return varient; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 7deca2b69d9..bc34d390dae 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -203,7 +203,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->machine = draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index d869eecec5e..5df84916c51 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, */ shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, - (const float (*)[4])constants[0], + (float (*)[4])constants[0], ppc_builtin_constants); /* convert (up to) four output verts from SoA back to AoS format */ @@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.create_varient = draw_vs_varient_aos_ppc; else #endif - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_ppc_prepare; vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 54e6423388f..14c95082a9d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -165,9 +165,9 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.draw = draw; if (1) - vs->base.create_varient = draw_vs_varient_aos_sse; + vs->base.create_varient = draw_vs_create_varient_aos_sse; else - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 0abd827b615..6eb26927f27 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -263,8 +263,9 @@ static void vsvg_destroy( struct draw_vs_varient *varient ) } -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { unsigned i; struct translate_key fetch, emit; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 5f9d4c62d1c..20ae958714b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1210,6 +1210,14 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->one; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1220,6 +1228,7 @@ lp_build_cos(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } @@ -1230,6 +1239,14 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->zero; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1240,6 +1257,7 @@ lp_build_sin(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index e60ab4f6ba1..8f15b1d287d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -792,3 +792,78 @@ lp_build_endif(struct lp_build_if_state *ctx) /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); } + + +/** + * Allocate a scalar (or vector) variable. + * + * Although not strictly part of control flow, control flow has deep impact in + * how variables should be allocated. + * + * The mem2reg optimization pass is the recommended way to dealing with mutable + * variables, and SSA. It looks for allocas and if it can handle them, it + * promotes them, but only looks for alloca instructions in the entry block of + * the function. Being in the entry block guarantees that the alloca is only + * executed once, which makes analysis simpler. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderAtEnd(first_builder, first_block); + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildAlloca(first_builder, type, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} + + +/** + * Allocate an array of scalars/vectors. + * + * mem2reg pass is not capable of promoting structs or arrays to registers, but + * we still put it in the first block anyway as failure to put allocas in the + * first block may prevent the X86 backend from successfully align the stack as + * required. + * + * Also the scalarrepl pass is supossedly more powerful and can promote + * arrays in many cases. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildArrayAlloca(first_builder, type, count, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 745838570c8..fffb493a93b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -156,5 +156,15 @@ lp_build_endif(struct lp_build_if_state *ctx); LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name); + +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index a5a019fa92a..6257e9a4047 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -40,6 +40,7 @@ #include "lp_bld_init.h" #include "lp_bld_type.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" @@ -370,11 +371,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); } - /* - * XXX: this should better go to the first block in the function - */ - - tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index a3b69701162..d13fa1a5d04 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -472,18 +472,6 @@ lp_build_select_aos(struct lp_build_context *bld, } } -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld) -{ - const struct lp_type type = bld->type; - - if (type.length > 1) { /*vector*/ - return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); - } else { /*scalar*/ - return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); - } -} - /** Return (a & ~b) */ LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 00a8c750196..29f9fc3b205 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -76,9 +76,6 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef b, const boolean cond[4]); -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld); - LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 54ef921678d..74dc9e1d812 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1830,6 +1830,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef unswizzled[4]; LLVMValueRef stride; + assert(bld->static_state->target == PIPE_TEXTURE_2D); + assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); lp_build_context_init(&u8n, builder, lp_type_unorm(8)); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index fac90c60064..d3c769e28b8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -744,22 +744,11 @@ emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx, i; - LLVMBasicBlockRef current_block = - LLVMGetInsertBlock(bld->base.builder); - LLVMBasicBlockRef first_block = - LLVMGetEntryBasicBlock( - LLVMGetBasicBlockParent(current_block)); - LLVMValueRef first_inst = - LLVMGetFirstInstruction(first_block); - - /* we want alloca's to be the first instruction - * in the function so we need to rewind the builder - * to the very beginning */ - LLVMPositionBuilderBefore(bld->base.builder, - first_inst); for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { @@ -767,23 +756,25 @@ emit_declaration( if (bld->has_indirect_addressing) { LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), last*4 + 4, 0); - bld->temps_array = LLVMBuildArrayAlloca(bld->base.builder, - lp_build_vec_type(bld->base.type), - val, ""); + bld->temps_array = lp_build_array_alloca(bld->base.builder, + vec_type, val, ""); } else { for (i = 0; i < NUM_CHANNELS; i++) - bld->temps[idx][i] = lp_build_alloca(&bld->base); + bld->temps[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); } break; case TGSI_FILE_OUTPUT: for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(&bld->base); + bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); break; case TGSI_FILE_ADDRESS: for (i = 0; i < NUM_CHANNELS; i++) - bld->addr[idx][i] = lp_build_alloca(&bld->base); + bld->addr[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); break; default: @@ -792,8 +783,6 @@ emit_declaration( } } - LLVMPositionBuilderAtEnd(bld->base.builder, - current_block); return TRUE; } @@ -1600,18 +1589,10 @@ emit_instruction( lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; - case TGSI_OPCODE_BGNFOR: - /* fall through */ case TGSI_OPCODE_BGNLOOP: lp_exec_bgnloop(&bld->exec_mask); break; - case TGSI_OPCODE_REP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ELSE: lp_exec_mask_cond_invert(&bld->exec_mask); break; @@ -1620,18 +1601,10 @@ emit_instruction( lp_exec_mask_cond_pop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDFOR: - /* fall-through */ case TGSI_OPCODE_ENDLOOP: lp_exec_endloop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDREP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 080fd4c7310..5d9eed92580 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -661,25 +661,6 @@ TGSI Instruction Specification TBD -1.9.8 BGNFOR - Begin a For-Loop - - dst.x = floor(src.x) - dst.y = floor(src.y) - dst.z = floor(src.z) - - if (dst.y <= 0) - pc = [matching ENDFOR] + 1 - endif - - Note: The destination must be a loop register. - The source must be a constant register. - - -1.9.9 REP - Repeat - - TBD - - 1.9.10 ELSE - Else TBD @@ -690,23 +671,6 @@ TGSI Instruction Specification TBD -1.9.12 ENDFOR - End a For-Loop - - dst.x = dst.x + dst.z - dst.y = dst.y - 1.0 - - if (dst.y > 0) - pc = [matching BGNFOR instruction] + 1 - endif - - Note: The destination must be a loop register. - - -1.9.13 ENDREP - End Repeat - - TBD - - 1.10 GL_NV_vertex_program3 --------------------------- diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 57031419f8e..83000200189 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -586,7 +586,6 @@ iter_instruction( /* update indentation */ if (inst->Instruction.Opcode == TGSI_OPCODE_IF || inst->Instruction.Opcode == TGSI_OPCODE_ELSE || - inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR || inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { ctx->indentation += indent_spaces; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 11045e4ba2f..82eac05dc4d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3186,14 +3186,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - case TGSI_OPCODE_PUSHA: assert (0); break; @@ -3258,29 +3250,6 @@ exec_instruction( emit_primitive(mach); break; - case TGSI_OPCODE_BGNFOR: - assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - for (chan_index = 0; chan_index < 3; chan_index++) { - FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); - } - ++mach->LoopCounterStackTop; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - /* TODO: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -3295,56 +3264,6 @@ exec_instruction( mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; - case TGSI_OPCODE_ENDFOR: - assert(mach->LoopCounterStackTop > 0); - micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); - assert(mach->LoopLabelStackTop > 0); - inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - UPDATE_EXEC_MASK(mach); - if (mach->ExecMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - assert(mach->LoopLabelStackTop > 0); - *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->LoopLabelStackTop > 0); - --mach->LoopLabelStackTop; - assert(mach->LoopCounterStackTop > 0); - --mach->LoopCounterStackTop; - - mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index de0e09cdbae..cfa2f631bd8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -106,12 +106,12 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 1, 1, 0, 0, 0, 1, "", 75 }, /* removed */ + { 0, 1, 0, 0, 0, 1, "", 76 }, /* removed */ { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 1, 0, 0, 0, 1, 0, "", 79 }, /* removed */ + { 0, 0, 0, 0, 1, 0, "", 80 }, /* removed */ { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e4af15c156f..e472947507d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -111,12 +111,8 @@ OP12(DP2) OP12_TEX(TXL) OP00(BRK) OP01_LBL(IF) -OP11(BGNFOR) -OP01(REP) OP00_LBL(ELSE) OP00(ENDIF) -OP10(ENDFOR) -OP00(ENDREP) OP01(PUSHA) OP10(POPA) OP11(CEIL) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 371f690b295..76b7564cc36 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -346,25 +346,6 @@ iter_instruction( } } - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - case TGSI_OPCODE_ENDFOR: - if (inst->Dst[0].Register.File != TGSI_FILE_LOOP || - inst->Dst[0].Register.Index != 0) { - report_error(ctx, "Destination register must be LOOP[0]"); - } - break; - } - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT && - inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) { - report_error(ctx, "Source register file must be either CONST or IMM"); - } - break; - } - ctx->num_instructions++; return TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index a85cc4659e0..1071298b497 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2533,14 +2533,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_BGNFOR: - return 0; - break; - - case TGSI_OPCODE_REP: - return 0; - break; - case TGSI_OPCODE_ELSE: return 0; break; @@ -2549,14 +2541,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_ENDFOR: - return 0; - break; - - case TGSI_OPCODE_ENDREP: - return 0; - break; - case TGSI_OPCODE_PUSHA: return 0; break; diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c new file mode 100644 index 00000000000..048bd5c34da --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_screen.h" +#include "util/u_format.h" +#include "util/u_debug.h" +#include "u_caps.h" + +/** + * Iterates over a list of caps checks as defined in u_caps.h. Should + * all checks pass returns TRUE and out is set to the last element of + * the list (TERMINATE). Should any check fail returns FALSE and set + * out to the index of the start of the first failing check. + */ +boolean +util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out) +{ + int i, tmpi; + float tmpf; + + for (i = 0; list[i];) { + switch(list[i++]) { + case UTIL_CAPS_CHECK_CAP: + if (!screen->get_param(screen, list[i++])) { + *out = i - 2; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_INT: + tmpi = screen->get_param(screen, list[i++]); + if (tmpi < (int)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FLOAT: + tmpf = screen->get_paramf(screen, list[i++]); + if (tmpf < (float)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FORMAT: + if (!screen->is_format_supported(screen, + list[i++], + PIPE_TEXTURE_2D, + PIPE_BIND_SAMPLER_VIEW, + 0)) { + *out = i - 2; + return FALSE; + } + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + *out = i - 1; + return FALSE; + default: + assert(!"Unsupported check"); + return FALSE; + } + } + + *out = i; + return TRUE; +} + +/** + * Iterates over a list of caps checks as defined in u_caps.h. + * Returns TRUE if all caps checks pass returns FALSE otherwise. + */ +boolean +util_check_caps(struct pipe_screen *screen, const unsigned *list) +{ + int out; + return util_check_caps_out(screen, list, &out); +} + + +/* + * Below follows some demo lists. + * + * None of these lists are exhausting lists of what is + * actually needed to support said API and more here for + * as example on how to uses the above functions. Especially + * for DX10 and DX11 where Gallium is missing features. + */ + +/* DX 9_1 */ +static unsigned caps_dx_9_1[] = { + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 2), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_2 */ +static unsigned caps_dx_9_2[] = { + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_3 */ +static unsigned caps_dx_9_3[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 10 */ +static unsigned caps_dx_10[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 8192 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 8192 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* DX11 */ +static unsigned caps_dx_11[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 16384 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 16384 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_FORMAT(B8G8R8A8_UNORM), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 2.1 */ +static unsigned caps_opengl_2_1[] = { + UTIL_CHECK_CAP(GLSL), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(TWO_SIDED_STENCIL), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 2), + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 3.0 */ +/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */ + + +/** + * Demo function which checks against theoretical caps needed for different APIs. + */ +void util_caps_demo_print(struct pipe_screen *screen) +{ + struct { + char* name; + unsigned *list; + } list[] = { + {"DX 9.1", caps_dx_9_1}, + {"DX 9.2", caps_dx_9_2}, + {"DX 9.3", caps_dx_9_3}, + {"DX 10", caps_dx_10}, + {"DX 11", caps_dx_11}, + {"OpenGL 2.1", caps_opengl_2_1}, +/* {"OpenGL 3.0", caps_opengl_3_0},*/ + {NULL, NULL} + }; + int i, out = 0; + + for (i = 0; list[i].name; i++) { + if (util_check_caps_out(screen, list[i].list, &out)) { + debug_printf("%s: %s yes\n", __FUNCTION__, list[i].name); + continue; + } + switch (list[i].list[out]) { + case UTIL_CAPS_CHECK_CAP: + debug_printf("%s: %s no (cap %u not supported)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1]); + break; + case UTIL_CAPS_CHECK_INT: + debug_printf("%s: %s no (cap %u less then %u)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FLOAT: + debug_printf("%s: %s no (cap %u less then %f)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + (double)(int)list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FORMAT: + debug_printf("%s: %s no (format %s not supported)\n", __FUNCTION__, + list[i].name, + util_format_name(list[i].list[out + 1]) + 12); + break; + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + debug_printf("%s: %s no (not implemented in gallium or state tracker)\n", + __FUNCTION__, list[i].name); + break; + default: + assert(!"Unsupported check"); + } + } +} diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h new file mode 100644 index 00000000000..b1074f9eb21 --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.h @@ -0,0 +1,67 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_CAPS_H +#define U_CAPS_H + +#include "pipe/p_compiler.h" + +struct pipe_screen; + +enum u_caps_check_enum { + UTIL_CAPS_CHECK_TERMINATE = 0, + UTIL_CAPS_CHECK_CAP, + UTIL_CAPS_CHECK_INT, + UTIL_CAPS_CHECK_FLOAT, + UTIL_CAPS_CHECK_FORMAT, + UTIL_CAPS_CHECK_UNIMPLEMENTED, +}; + +#define UTIL_CHECK_CAP(cap) \ + UTIL_CAPS_CHECK_CAP, PIPE_CAP_##cap + +#define UTIL_CHECK_INT(cap, higher) \ + UTIL_CAPS_CHECK_INT, PIPE_CAP_##cap, (unsigned)(higher) + +/* Floats currently lose precision */ +#define UTIL_CHECK_FLOAT(cap, higher) \ + UTIL_CAPS_CHECK_FLOAT, PIPE_CAP_##cap, (unsigned)(int)(higher) + +#define UTIL_CHECK_FORMAT(format) \ + UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format + +#define UTIL_CHECK_UNIMPLEMENTED \ + UTIL_CAPS_CHECK_UNIMPLEMENTED + +#define UTIL_CHECK_TERMINATE \ + UTIL_CAPS_CHECK_TERMINATE + +boolean util_check_caps(struct pipe_screen *screen, const unsigned *list); +boolean util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out); +void util_caps_demo_print(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index c134f13e908..2ce643e90cd 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -656,12 +656,12 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state) util_dump_struct_begin(stream, "pipe_transfer"); util_dump_member(stream, ptr, state, resource); -// util_dump_member(stream, uint, state, box); + /*util_dump_member(stream, uint, state, box);*/ util_dump_member(stream, uint, state, stride); util_dump_member(stream, uint, state, slice_stride); -// util_dump_member(stream, ptr, state, data); + /*util_dump_member(stream, ptr, state, data);*/ util_dump_struct_end(stream); } diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 605b13bd114..fb6ade5c06b 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -332,7 +332,7 @@ util_format_name(enum pipe_format format) assert(desc); if (!desc) { - return "???"; + return "PIPE_FORMAT_???"; } return desc->name; diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index 66edb597fc8..5b279b8fe26 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -240,13 +240,14 @@ util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, util_format_dxtn_fetch_t fetch, unsigned block_size) { + const unsigned bw = 4, bh = 4, comps = 4; unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { + for(y = 0; y < height; y += bh) { const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; fetch(0, src, i, j, dst); } } @@ -379,212 +380,197 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, void util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][3]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][3]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k]; } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, - const float *src_row, unsigned src_stride, + const float *src, unsigned src_stride, unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][3]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, - const float *src_row, unsigned src_stride, + const float *src, unsigned src_stride, unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index fe327c302b7..f7aa1403d08 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -544,7 +544,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z24_UNORM_S8_USCALED: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -571,7 +571,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8_USCALED_Z24_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ |