diff options
author | Roland Scheidegger <[email protected]> | 2010-04-30 15:27:13 +0200 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2010-04-30 15:27:13 +0200 |
commit | 7662e3519bef3802024da3050b886068281e02b1 (patch) | |
tree | bc5bf675e67febffaf9e4d1fadd139e8c253ae32 /src/gallium | |
parent | 9a966b93c03aecac8eb72e6133aa9a2f2c7673f3 (diff) | |
parent | 7c769bef052fc1936d7ab33e291bb4646dc5b0d1 (diff) |
Merge commit 'origin/master' into gallium-msaa
Diffstat (limited to 'src/gallium')
90 files changed, 1446 insertions, 990 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index f8e65cf6c61..2daed382cf3 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -101,6 +101,7 @@ C_SOURCES = \ util/u_blit.c \ util/u_blitter.c \ util/u_cache.c \ + util/u_caps.c \ util/u_cpu_detect.c \ util/u_dl.c \ util/u_draw_quad.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index db3a1e73114..a0673df8a8e 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -144,6 +144,7 @@ source = [ 'util/u_blit.c', 'util/u_blitter.c', 'util/u_cache.c', + 'util/u_caps.c', 'util/u_cpu_detect.c', 'util/u_debug.c', 'util/u_debug_memory.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 030683f60f1..55e0ded1b30 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -314,10 +314,13 @@ void cso_release_all( struct cso_context *ctx ) } +/** + * Free the CSO context. NOTE: the state tracker should have previously called + * cso_release_all(). + */ void cso_destroy_context( struct cso_context *ctx ) { if (ctx) { - /*cso_release_all( ctx );*/ FREE( ctx ); } } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 703b46b518b..3b2df054c3c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -24,6 +24,8 @@ /* generates the draw jit function */ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); static void init_globals(struct draw_llvm *llvm) @@ -218,6 +220,7 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); draw_llvm_generate(llvm, variant); + draw_llvm_generate_elts(llvm, variant); return variant; } @@ -696,6 +699,158 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_disassemble(variant->jit_func); } + +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_context bld_int; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + LLVMValueRef fetch_max; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ + arg_types[4] = LLVMInt32Type(); /* fetch_count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function_elts, 0); + io_ptr = LLVMGetParam(variant->function_elts, 1); + vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); + fetch_elts = LLVMGetParam(variant->function_elts, 3); + fetch_count = LLVMGetParam(variant->function_elts, 4); + stride = LLVMGetParam(variant->function_elts, 5); + vb_ptr = LLVMGetParam(variant->function_elts, 6); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(fetch_elts, "fetch_elts"); + lp_build_name(fetch_count, "fetch_count"); + lp_build_name(stride, "stride"); + lp_build_name(vb_ptr, "vb"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function_elts, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld_int, builder, lp_type_int(32)); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + + fetch_max = LLVMBuildSub(builder, fetch_count, + LLVMConstInt(LLVMInt32Type(), 1, 0), + "fetch_max"); + + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = lp_loop.counter; + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + LLVMValueRef fetch_ptr; + + /* make sure we're not out of bounds which can happen + * if fetch_count % 4 != 0, because on the last iteration + * a few of the 4 vertex fetches will be out of bounds */ + true_index = lp_build_min(&bld_int, true_index, fetch_max); + + fetch_ptr = LLVMBuildGEP(builder, fetch_elts, + &true_index, 1, ""); + true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, + 0); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, + &vb_index, 1, ""); + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vb, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function_elts); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); + + if (0) { + LLVMDumpValue(variant->function_elts); + debug_printf("\n"); + } + variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal( + llvm->draw->engine, variant->function_elts); + + if (0) + lp_disassemble(variant->jit_func_elts); +} + void draw_llvm_make_variant_key(struct draw_llvm *llvm, struct draw_llvm_variant_key *key) diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 20f4f3da5f3..58fee7f9d60 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -94,6 +94,16 @@ typedef void unsigned stride, struct pipe_vertex_buffer *vertex_buffers); + +typedef void +(*draw_jit_vert_func_elts)(struct draw_jit_context *context, + struct vertex_header *io, + const char *vbuffers[PIPE_MAX_ATTRIBS], + const unsigned *fetch_elts, + unsigned fetch_count, + unsigned stride, + struct pipe_vertex_buffer *vertex_buffers); + struct draw_llvm { struct draw_context *draw; @@ -122,7 +132,9 @@ struct draw_llvm_variant { struct draw_llvm_variant_key key; LLVMValueRef function; + LLVMValueRef function_elts; draw_jit_vert_func jit_func; + draw_jit_vert_func_elts jit_func_elts; struct draw_llvm_variant *next; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 1d9395e06c3..b853f3a89f8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -38,10 +38,10 @@ #include "util/u_prim.h" -DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE); -DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE) #ifdef HAVE_LLVM -DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE) #endif static unsigned trim( unsigned count, unsigned first, unsigned incr ) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index f71271bd915..d2a492f2b4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -167,8 +167,6 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vshader = draw->vs.vertex_shader; - struct draw_geometry_shader *gshader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( fetch_count, 4 ); @@ -182,35 +180,13 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, return; } - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run( fpme->fetch, - fetch_elts, - fetch_count, - (char *)pipeline_verts ); - - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. If there is no shader, eg if - * bypass_vs_clip_and_viewport, then the inputs == outputs, and are - * already in the correct place.*/ - if (opt & PT_SHADE) - { - vshader->run_linear(vshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - if (gshader) - draw_geometry_shader_run(gshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - } + fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + fetch_elts, + fetch_count, + fpme->vertex_size, + draw->pt.vertex_buffer ); if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 0c059573505..c2832eefa2a 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -46,7 +46,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" -DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) void draw_vs_set_constants(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index efead42335f..6c7e94db433 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -169,8 +169,9 @@ draw_create_vs_ppc(struct draw_context *draw, struct draw_vs_varient_key; struct draw_vertex_shader; -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); @@ -188,8 +189,9 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw, struct translate *draw_vs_get_emit( struct draw_context *draw, struct translate_key *key ); -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index e7121f36541..19f49e34c8b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2089,13 +2089,21 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, } +/** cast wrapper */ +static INLINE struct draw_vs_varient_aos_sse * +draw_vs_varient_aos_sse(struct draw_vs_varient *varient) +{ + return (struct draw_vs_varient_aos_sse *) varient; +} + static void vaos_set_buffer( struct draw_vs_varient *varient, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_stride) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); if (buf < vaos->nr_vb) { vaos->buffer[buf].base_ptr = (char *)ptr; @@ -2112,7 +2120,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2136,7 +2144,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2165,7 +2173,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, static void vaos_destroy( struct draw_vs_varient *varient ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); FREE( vaos->buffer ); @@ -2241,13 +2249,14 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, } -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { struct draw_vs_varient *varient = varient_aos_sse( vs, key ); if (varient == NULL) { - varient = draw_vs_varient_generic( vs, key ); + varient = draw_vs_create_varient_generic( vs, key ); } return varient; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 7deca2b69d9..bc34d390dae 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -203,7 +203,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->machine = draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index d869eecec5e..5df84916c51 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, */ shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, - (const float (*)[4])constants[0], + (float (*)[4])constants[0], ppc_builtin_constants); /* convert (up to) four output verts from SoA back to AoS format */ @@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.create_varient = draw_vs_varient_aos_ppc; else #endif - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_ppc_prepare; vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 54e6423388f..14c95082a9d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -165,9 +165,9 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.draw = draw; if (1) - vs->base.create_varient = draw_vs_varient_aos_sse; + vs->base.create_varient = draw_vs_create_varient_aos_sse; else - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 0abd827b615..6eb26927f27 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -263,8 +263,9 @@ static void vsvg_destroy( struct draw_vs_varient *varient ) } -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { unsigned i; struct translate_key fetch, emit; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 5f9d4c62d1c..20ae958714b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1210,6 +1210,14 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->one; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1220,6 +1228,7 @@ lp_build_cos(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } @@ -1230,6 +1239,14 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->zero; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1240,6 +1257,7 @@ lp_build_sin(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index e60ab4f6ba1..8f15b1d287d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -792,3 +792,78 @@ lp_build_endif(struct lp_build_if_state *ctx) /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); } + + +/** + * Allocate a scalar (or vector) variable. + * + * Although not strictly part of control flow, control flow has deep impact in + * how variables should be allocated. + * + * The mem2reg optimization pass is the recommended way to dealing with mutable + * variables, and SSA. It looks for allocas and if it can handle them, it + * promotes them, but only looks for alloca instructions in the entry block of + * the function. Being in the entry block guarantees that the alloca is only + * executed once, which makes analysis simpler. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderAtEnd(first_builder, first_block); + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildAlloca(first_builder, type, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} + + +/** + * Allocate an array of scalars/vectors. + * + * mem2reg pass is not capable of promoting structs or arrays to registers, but + * we still put it in the first block anyway as failure to put allocas in the + * first block may prevent the X86 backend from successfully align the stack as + * required. + * + * Also the scalarrepl pass is supossedly more powerful and can promote + * arrays in many cases. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildArrayAlloca(first_builder, type, count, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 745838570c8..fffb493a93b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -156,5 +156,15 @@ lp_build_endif(struct lp_build_if_state *ctx); LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name); + +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index a5a019fa92a..6257e9a4047 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -40,6 +40,7 @@ #include "lp_bld_init.h" #include "lp_bld_type.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" @@ -370,11 +371,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); } - /* - * XXX: this should better go to the first block in the function - */ - - tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index a3b69701162..d13fa1a5d04 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -472,18 +472,6 @@ lp_build_select_aos(struct lp_build_context *bld, } } -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld) -{ - const struct lp_type type = bld->type; - - if (type.length > 1) { /*vector*/ - return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); - } else { /*scalar*/ - return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); - } -} - /** Return (a & ~b) */ LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 00a8c750196..29f9fc3b205 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -76,9 +76,6 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef b, const boolean cond[4]); -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld); - LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 54ef921678d..74dc9e1d812 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1830,6 +1830,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef unswizzled[4]; LLVMValueRef stride; + assert(bld->static_state->target == PIPE_TEXTURE_2D); + assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); lp_build_context_init(&u8n, builder, lp_type_unorm(8)); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index fac90c60064..d3c769e28b8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -744,22 +744,11 @@ emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx, i; - LLVMBasicBlockRef current_block = - LLVMGetInsertBlock(bld->base.builder); - LLVMBasicBlockRef first_block = - LLVMGetEntryBasicBlock( - LLVMGetBasicBlockParent(current_block)); - LLVMValueRef first_inst = - LLVMGetFirstInstruction(first_block); - - /* we want alloca's to be the first instruction - * in the function so we need to rewind the builder - * to the very beginning */ - LLVMPositionBuilderBefore(bld->base.builder, - first_inst); for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { @@ -767,23 +756,25 @@ emit_declaration( if (bld->has_indirect_addressing) { LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), last*4 + 4, 0); - bld->temps_array = LLVMBuildArrayAlloca(bld->base.builder, - lp_build_vec_type(bld->base.type), - val, ""); + bld->temps_array = lp_build_array_alloca(bld->base.builder, + vec_type, val, ""); } else { for (i = 0; i < NUM_CHANNELS; i++) - bld->temps[idx][i] = lp_build_alloca(&bld->base); + bld->temps[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); } break; case TGSI_FILE_OUTPUT: for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(&bld->base); + bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); break; case TGSI_FILE_ADDRESS: for (i = 0; i < NUM_CHANNELS; i++) - bld->addr[idx][i] = lp_build_alloca(&bld->base); + bld->addr[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); break; default: @@ -792,8 +783,6 @@ emit_declaration( } } - LLVMPositionBuilderAtEnd(bld->base.builder, - current_block); return TRUE; } @@ -1600,18 +1589,10 @@ emit_instruction( lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; - case TGSI_OPCODE_BGNFOR: - /* fall through */ case TGSI_OPCODE_BGNLOOP: lp_exec_bgnloop(&bld->exec_mask); break; - case TGSI_OPCODE_REP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ELSE: lp_exec_mask_cond_invert(&bld->exec_mask); break; @@ -1620,18 +1601,10 @@ emit_instruction( lp_exec_mask_cond_pop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDFOR: - /* fall-through */ case TGSI_OPCODE_ENDLOOP: lp_exec_endloop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDREP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 080fd4c7310..5d9eed92580 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -661,25 +661,6 @@ TGSI Instruction Specification TBD -1.9.8 BGNFOR - Begin a For-Loop - - dst.x = floor(src.x) - dst.y = floor(src.y) - dst.z = floor(src.z) - - if (dst.y <= 0) - pc = [matching ENDFOR] + 1 - endif - - Note: The destination must be a loop register. - The source must be a constant register. - - -1.9.9 REP - Repeat - - TBD - - 1.9.10 ELSE - Else TBD @@ -690,23 +671,6 @@ TGSI Instruction Specification TBD -1.9.12 ENDFOR - End a For-Loop - - dst.x = dst.x + dst.z - dst.y = dst.y - 1.0 - - if (dst.y > 0) - pc = [matching BGNFOR instruction] + 1 - endif - - Note: The destination must be a loop register. - - -1.9.13 ENDREP - End Repeat - - TBD - - 1.10 GL_NV_vertex_program3 --------------------------- diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 57031419f8e..83000200189 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -586,7 +586,6 @@ iter_instruction( /* update indentation */ if (inst->Instruction.Opcode == TGSI_OPCODE_IF || inst->Instruction.Opcode == TGSI_OPCODE_ELSE || - inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR || inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { ctx->indentation += indent_spaces; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 11045e4ba2f..82eac05dc4d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3186,14 +3186,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - case TGSI_OPCODE_PUSHA: assert (0); break; @@ -3258,29 +3250,6 @@ exec_instruction( emit_primitive(mach); break; - case TGSI_OPCODE_BGNFOR: - assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - for (chan_index = 0; chan_index < 3; chan_index++) { - FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); - } - ++mach->LoopCounterStackTop; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - /* TODO: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -3295,56 +3264,6 @@ exec_instruction( mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; - case TGSI_OPCODE_ENDFOR: - assert(mach->LoopCounterStackTop > 0); - micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); - assert(mach->LoopLabelStackTop > 0); - inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - UPDATE_EXEC_MASK(mach); - if (mach->ExecMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - assert(mach->LoopLabelStackTop > 0); - *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->LoopLabelStackTop > 0); - --mach->LoopLabelStackTop; - assert(mach->LoopCounterStackTop > 0); - --mach->LoopCounterStackTop; - - mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index de0e09cdbae..cfa2f631bd8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -106,12 +106,12 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 1, 1, 0, 0, 0, 1, "", 75 }, /* removed */ + { 0, 1, 0, 0, 0, 1, "", 76 }, /* removed */ { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 1, 0, 0, 0, 1, 0, "", 79 }, /* removed */ + { 0, 0, 0, 0, 1, 0, "", 80 }, /* removed */ { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e4af15c156f..e472947507d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -111,12 +111,8 @@ OP12(DP2) OP12_TEX(TXL) OP00(BRK) OP01_LBL(IF) -OP11(BGNFOR) -OP01(REP) OP00_LBL(ELSE) OP00(ENDIF) -OP10(ENDFOR) -OP00(ENDREP) OP01(PUSHA) OP10(POPA) OP11(CEIL) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 371f690b295..76b7564cc36 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -346,25 +346,6 @@ iter_instruction( } } - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - case TGSI_OPCODE_ENDFOR: - if (inst->Dst[0].Register.File != TGSI_FILE_LOOP || - inst->Dst[0].Register.Index != 0) { - report_error(ctx, "Destination register must be LOOP[0]"); - } - break; - } - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT && - inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) { - report_error(ctx, "Source register file must be either CONST or IMM"); - } - break; - } - ctx->num_instructions++; return TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index a85cc4659e0..1071298b497 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2533,14 +2533,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_BGNFOR: - return 0; - break; - - case TGSI_OPCODE_REP: - return 0; - break; - case TGSI_OPCODE_ELSE: return 0; break; @@ -2549,14 +2541,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_ENDFOR: - return 0; - break; - - case TGSI_OPCODE_ENDREP: - return 0; - break; - case TGSI_OPCODE_PUSHA: return 0; break; diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c new file mode 100644 index 00000000000..048bd5c34da --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_screen.h" +#include "util/u_format.h" +#include "util/u_debug.h" +#include "u_caps.h" + +/** + * Iterates over a list of caps checks as defined in u_caps.h. Should + * all checks pass returns TRUE and out is set to the last element of + * the list (TERMINATE). Should any check fail returns FALSE and set + * out to the index of the start of the first failing check. + */ +boolean +util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out) +{ + int i, tmpi; + float tmpf; + + for (i = 0; list[i];) { + switch(list[i++]) { + case UTIL_CAPS_CHECK_CAP: + if (!screen->get_param(screen, list[i++])) { + *out = i - 2; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_INT: + tmpi = screen->get_param(screen, list[i++]); + if (tmpi < (int)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FLOAT: + tmpf = screen->get_paramf(screen, list[i++]); + if (tmpf < (float)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FORMAT: + if (!screen->is_format_supported(screen, + list[i++], + PIPE_TEXTURE_2D, + PIPE_BIND_SAMPLER_VIEW, + 0)) { + *out = i - 2; + return FALSE; + } + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + *out = i - 1; + return FALSE; + default: + assert(!"Unsupported check"); + return FALSE; + } + } + + *out = i; + return TRUE; +} + +/** + * Iterates over a list of caps checks as defined in u_caps.h. + * Returns TRUE if all caps checks pass returns FALSE otherwise. + */ +boolean +util_check_caps(struct pipe_screen *screen, const unsigned *list) +{ + int out; + return util_check_caps_out(screen, list, &out); +} + + +/* + * Below follows some demo lists. + * + * None of these lists are exhausting lists of what is + * actually needed to support said API and more here for + * as example on how to uses the above functions. Especially + * for DX10 and DX11 where Gallium is missing features. + */ + +/* DX 9_1 */ +static unsigned caps_dx_9_1[] = { + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 2), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_2 */ +static unsigned caps_dx_9_2[] = { + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_3 */ +static unsigned caps_dx_9_3[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 10 */ +static unsigned caps_dx_10[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 8192 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 8192 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* DX11 */ +static unsigned caps_dx_11[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 16384 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 16384 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_FORMAT(B8G8R8A8_UNORM), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 2.1 */ +static unsigned caps_opengl_2_1[] = { + UTIL_CHECK_CAP(GLSL), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(TWO_SIDED_STENCIL), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 2), + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 3.0 */ +/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */ + + +/** + * Demo function which checks against theoretical caps needed for different APIs. + */ +void util_caps_demo_print(struct pipe_screen *screen) +{ + struct { + char* name; + unsigned *list; + } list[] = { + {"DX 9.1", caps_dx_9_1}, + {"DX 9.2", caps_dx_9_2}, + {"DX 9.3", caps_dx_9_3}, + {"DX 10", caps_dx_10}, + {"DX 11", caps_dx_11}, + {"OpenGL 2.1", caps_opengl_2_1}, +/* {"OpenGL 3.0", caps_opengl_3_0},*/ + {NULL, NULL} + }; + int i, out = 0; + + for (i = 0; list[i].name; i++) { + if (util_check_caps_out(screen, list[i].list, &out)) { + debug_printf("%s: %s yes\n", __FUNCTION__, list[i].name); + continue; + } + switch (list[i].list[out]) { + case UTIL_CAPS_CHECK_CAP: + debug_printf("%s: %s no (cap %u not supported)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1]); + break; + case UTIL_CAPS_CHECK_INT: + debug_printf("%s: %s no (cap %u less then %u)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FLOAT: + debug_printf("%s: %s no (cap %u less then %f)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + (double)(int)list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FORMAT: + debug_printf("%s: %s no (format %s not supported)\n", __FUNCTION__, + list[i].name, + util_format_name(list[i].list[out + 1]) + 12); + break; + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + debug_printf("%s: %s no (not implemented in gallium or state tracker)\n", + __FUNCTION__, list[i].name); + break; + default: + assert(!"Unsupported check"); + } + } +} diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h new file mode 100644 index 00000000000..b1074f9eb21 --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.h @@ -0,0 +1,67 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_CAPS_H +#define U_CAPS_H + +#include "pipe/p_compiler.h" + +struct pipe_screen; + +enum u_caps_check_enum { + UTIL_CAPS_CHECK_TERMINATE = 0, + UTIL_CAPS_CHECK_CAP, + UTIL_CAPS_CHECK_INT, + UTIL_CAPS_CHECK_FLOAT, + UTIL_CAPS_CHECK_FORMAT, + UTIL_CAPS_CHECK_UNIMPLEMENTED, +}; + +#define UTIL_CHECK_CAP(cap) \ + UTIL_CAPS_CHECK_CAP, PIPE_CAP_##cap + +#define UTIL_CHECK_INT(cap, higher) \ + UTIL_CAPS_CHECK_INT, PIPE_CAP_##cap, (unsigned)(higher) + +/* Floats currently lose precision */ +#define UTIL_CHECK_FLOAT(cap, higher) \ + UTIL_CAPS_CHECK_FLOAT, PIPE_CAP_##cap, (unsigned)(int)(higher) + +#define UTIL_CHECK_FORMAT(format) \ + UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format + +#define UTIL_CHECK_UNIMPLEMENTED \ + UTIL_CAPS_CHECK_UNIMPLEMENTED + +#define UTIL_CHECK_TERMINATE \ + UTIL_CAPS_CHECK_TERMINATE + +boolean util_check_caps(struct pipe_screen *screen, const unsigned *list); +boolean util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out); +void util_caps_demo_print(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index c134f13e908..2ce643e90cd 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -656,12 +656,12 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state) util_dump_struct_begin(stream, "pipe_transfer"); util_dump_member(stream, ptr, state, resource); -// util_dump_member(stream, uint, state, box); + /*util_dump_member(stream, uint, state, box);*/ util_dump_member(stream, uint, state, stride); util_dump_member(stream, uint, state, slice_stride); -// util_dump_member(stream, ptr, state, data); + /*util_dump_member(stream, ptr, state, data);*/ util_dump_struct_end(stream); } diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 605b13bd114..fb6ade5c06b 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -332,7 +332,7 @@ util_format_name(enum pipe_format format) assert(desc); if (!desc) { - return "???"; + return "PIPE_FORMAT_???"; } return desc->name; diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index 66edb597fc8..5b279b8fe26 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -240,13 +240,14 @@ util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, util_format_dxtn_fetch_t fetch, unsigned block_size) { + const unsigned bw = 4, bh = 4, comps = 4; unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { + for(y = 0; y < height; y += bh) { const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; fetch(0, src, i, j, dst); } } @@ -379,212 +380,197 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, void util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][3]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][3]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k]; } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, - const uint8_t *src_row, unsigned src_stride, + const uint8_t *src, unsigned src_stride, unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, - const float *src_row, unsigned src_stride, + const float *src, unsigned src_stride, unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][3]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, - const float *src_row, unsigned src_stride, + const float *src, unsigned src_stride, unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index fe327c302b7..f7aa1403d08 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -544,7 +544,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z24_UNORM_S8_USCALED: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -571,7 +571,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8_USCALED_Z24_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index c292cd37d5c..e2c8602da02 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -786,33 +786,6 @@ This instruction replicates its result. TBD -.. opcode:: BGNFOR - Begin a For-Loop - - dst.x = floor(src.x) - dst.y = floor(src.y) - dst.z = floor(src.z) - - if (dst.y <= 0) - pc = [matching ENDFOR] + 1 - endif - - Note: The destination must be a loop register. - The source must be a constant register. - -.. note:: - - Considered for cleanup. - -.. note:: - - Considered for removal. - - -.. opcode:: REP - Repeat - - TBD - - .. opcode:: ELSE - Else TBD @@ -823,30 +796,6 @@ This instruction replicates its result. TBD -.. opcode:: ENDFOR - End a For-Loop - - dst.x = dst.x + dst.z - dst.y = dst.y - 1.0 - - if (dst.y > 0) - pc = [matching BGNFOR instruction] + 1 - endif - - Note: The destination must be a loop register. - -.. note:: - - Considered for cleanup. - -.. note:: - - Considered for removal. - -.. opcode:: ENDREP - End Repeat - - TBD - - .. opcode:: PUSHA - Push Address Register On Stack push(src.x) diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d2166a49016..d7788bd9bbb 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1622,14 +1622,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_REP: - ASSERT (0); - break; - - case TGSI_OPCODE_ENDREP: - ASSERT (0); - break; - case TGSI_OPCODE_PUSHA: ASSERT (0); break; @@ -1743,8 +1735,6 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_BGNFOR: - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -1753,8 +1743,6 @@ exec_instruction( mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDFOR: - /* fall-through (for now at least) */ case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ ASSERT(mach->ContStackTop > 0); diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index f7cf06d8d46..32b80d3a9f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -97,63 +97,24 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; llvmpipe->pipe.priv = priv; - llvmpipe->pipe.destroy = llvmpipe_destroy; - - /* state setters */ - llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state; - llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state; - llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; - - llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; - llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; - llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; - - llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; - llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state; - llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state; - - llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state; - llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state; - llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state; - - llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state; - llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state; - llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; - - llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state; - llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; - llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; - llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; - llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; - llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; - - llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; - llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; - llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; - llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; + /* Init the pipe context methods */ + llvmpipe->pipe.destroy = llvmpipe_destroy; llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; - llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; - llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; - llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; - llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; - llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; - llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; - - llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; - - llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; - llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; - llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.clear = llvmpipe_clear; llvmpipe->pipe.flush = llvmpipe_flush; - + llvmpipe_init_blend_funcs(llvmpipe); + llvmpipe_init_clip_funcs(llvmpipe); + llvmpipe_init_draw_funcs(llvmpipe); + llvmpipe_init_sampler_funcs(llvmpipe); llvmpipe_init_query_funcs( llvmpipe ); + llvmpipe_init_vertex_funcs(llvmpipe); + llvmpipe_init_fs_funcs(llvmpipe); + llvmpipe_init_vs_funcs(llvmpipe); + llvmpipe_init_rasterizer_funcs(llvmpipe); llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); + llvmpipe_init_surface_functions(llvmpipe); /* * Create drawing context and plug our rendering stage into it. @@ -186,8 +147,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); #endif - lp_init_surface_functions(llvmpipe); - lp_reset_counters(); return &llvmpipe->pipe; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 4848101ffb8..4e597b24796 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -94,9 +94,6 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; - /** Which vertex shader output slot contains point size */ - int psize_slot; - /** The tiling engine */ struct lp_setup_context *setup; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 0b63e1c889e..98780d7631b 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -42,20 +42,12 @@ -void -llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); -} - - /** * Draw vertex arrays, with optional indexing. * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -void +static void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, @@ -115,7 +107,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, } -void +static void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, @@ -128,3 +120,19 @@ llvmpipe_draw_elements(struct pipe_context *pipe, mode, start, count ); } + +static void +llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); +} + + +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; + llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; + llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; +} diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h index c7c5a1eca87..4102a9df67c 100644 --- a/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -61,4 +61,10 @@ #define LP_MAX_THREADS 8 +/** + * Max bytes per scene. This may be replaced by a runtime parameter. + */ +#define LP_MAX_SCENE_SIZE (512 * 1024 * 1024) + + #endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 400404fc524..a00a592f2fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -882,9 +882,9 @@ create_rast_threads(struct lp_rasterizer *rast) /** - * Create new lp_rasterizer. - * \param empty the queue to put empty scenes on after we've finished - * processing them. + * Create new lp_rasterizer. If num_threads is zero, don't create any + * new threads, do rendering synchronously. + * \param num_threads number of rasterizer threads to create */ struct lp_rasterizer * lp_rast_create( unsigned num_threads ) @@ -944,6 +944,8 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); + lp_scene_queue_destroy(rast->full_scenes); + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 18457ff4ce2..5884d12721e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -105,7 +105,7 @@ struct lp_rasterizer * (potentially) shared, these empty scenes should be returned to * the context which created them rather than retained here. */ - struct lp_scene_queue *empty_scenes; + /* struct lp_scene_queue *empty_scenes; */ /** The scene currently being rasterized by the threads */ struct lp_scene *curr_scene; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 71d9529230b..1482a777ff8 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -32,7 +32,6 @@ #include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" -#include "lp_debug.h" /** List of texture references */ @@ -43,6 +42,10 @@ struct texture_ref { +/** + * Create a new scene object. + * \param queue the queue to put newly rendered/emptied scenes into + */ struct lp_scene * lp_scene_create( struct pipe_context *pipe, struct lp_scene_queue *queue ) @@ -195,6 +198,8 @@ lp_scene_reset(struct lp_scene *scene ) make_empty_list(ref_list); } + scene->scene_size = 0; + scene->has_color_clear = FALSE; scene->has_depth_clear = FALSE; } @@ -226,7 +231,10 @@ lp_bin_new_data_block( struct data_block_list *list ) } -/** Return number of bytes used for all bin data within a scene */ +/** + * Return number of bytes used for all bin data within a scene. + * This does not include resources (textures) referenced by the scene. + */ unsigned lp_scene_data_size( const struct lp_scene *scene ) { @@ -267,6 +275,8 @@ lp_scene_add_resource_reference(struct lp_scene *scene, pipe_resource_reference(&ref->resource, resource); insert_at_tail(ref_list, ref); } + + scene->scene_size += llvmpipe_resource_size(resource); } @@ -401,61 +411,6 @@ end: } - -/** - * Prepare this scene for the rasterizer. - * Map the framebuffer surfaces. Initialize the 'rast' state. - */ -static boolean -lp_scene_map_buffers( struct lp_scene *scene ) -{ - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - /* XXX framebuffer surfaces are no longer mapped here */ - /* XXX move all map/unmap stuff into rast module... */ - - return TRUE; -} - - - -/** - * Called after rasterizer as finished rasterizing a scene. - * - * We want to call this from the pipe_context's current thread to - * avoid having to have mutexes on the transfer functions. - */ -static void -lp_scene_unmap_buffers( struct lp_scene *scene ) -{ -#if 0 - unsigned i; - - for (i = 0; i < scene->fb.nr_cbufs; i++) { - if (scene->cbuf_map[i]) { - struct pipe_surface *cbuf = scene->fb.cbufs[i]; - llvmpipe_resource_unmap(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice); - scene->cbuf_map[i] = NULL; - } - } - - if (scene->zsbuf_map) { - struct pipe_surface *zsbuf = scene->fb.zsbuf; - llvmpipe_resource_unmap(zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice); - scene->zsbuf_map = NULL; - } -#endif - - util_unreference_framebuffer_state( &scene->fb ); -} - - void lp_scene_begin_binning( struct lp_scene *scene, struct pipe_framebuffer_state *fb ) { @@ -472,8 +427,7 @@ void lp_scene_begin_binning( struct lp_scene *scene, void lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast, - boolean write_depth ) + struct lp_rasterizer *rast ) { if (0) { unsigned x, y; @@ -487,11 +441,6 @@ void lp_scene_rasterize( struct lp_scene *scene, } } - scene->write_depth = (scene->fb.zsbuf != NULL && - write_depth); - - lp_scene_map_buffers( scene ); - /* Enqueue the scene for rasterization, then immediately wait for * it to finish. */ @@ -502,6 +451,9 @@ void lp_scene_rasterize( struct lp_scene *scene, * transfers become per-context: */ lp_rast_finish( rast ); - lp_scene_unmap_buffers( scene ); + + util_unreference_framebuffer_state( &scene->fb ); + + /* put scene into the empty list */ lp_scene_enqueue( scene->empty_queue, scene ); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 3e2bd0e7b38..9467cd6f16d 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -119,7 +119,11 @@ struct lp_scene { /** list of resources referenced by the scene commands */ struct resource_ref resources; - boolean write_depth; + /** Approx memory used by the scene (in bytes). This includes the + * shared and per-tile bins plus any referenced resources/textures. + */ + unsigned scene_size; + boolean has_color_clear; boolean has_depth_clear; @@ -182,6 +186,8 @@ lp_scene_alloc( struct lp_scene *scene, unsigned size) lp_bin_new_data_block( list ); } + scene->scene_size += size; + { struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; @@ -204,6 +210,8 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, lp_bin_new_data_block( list ); } + scene->scene_size += size; + { struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; @@ -220,6 +228,7 @@ static INLINE void lp_scene_putback_data( struct lp_scene *scene, unsigned size) { struct data_block_list *list = &scene->data; + scene->scene_size -= size; assert(list->tail->used >= size); list->tail->used -= size; } @@ -302,11 +311,18 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); void lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast, - boolean write_depth ); + struct lp_rasterizer *rast ); void lp_scene_begin_binning( struct lp_scene *scene, struct pipe_framebuffer_state *fb ); + +static INLINE unsigned +lp_scene_get_size(const struct lp_scene *scene) +{ + return scene->scene_size; +} + + #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 00a897c86f8..111eedc4f23 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -97,6 +97,8 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index abc659c3691..21509560084 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -74,6 +74,26 @@ lp_setup_get_current_scene(struct lp_setup_context *setup) } +/** + * Check if the size of the current scene has exceeded the limit. + * If so, flush/render it. + */ +static void +setup_check_scene_size_and_flush(struct lp_setup_context *setup) +{ + if (setup->scene) { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + unsigned size = lp_scene_get_size(scene); + + if (size > LP_MAX_SCENE_SIZE) { + /*printf("LLVMPIPE: scene size = %u, flushing.\n", size);*/ + set_scene_state( setup, SETUP_FLUSHED ); + /*assert(lp_scene_get_size(scene) == 0);*/ + } + } +} + + static void first_triangle( struct lp_setup_context *setup, const float (*v0)[4], @@ -132,14 +152,11 @@ static void reset_context( struct lp_setup_context *setup ) /** Rasterize all scene's bins */ static void -lp_setup_rasterize_scene( struct lp_setup_context *setup, - boolean write_depth ) +lp_setup_rasterize_scene( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_scene_rasterize(scene, - setup->rast, - write_depth); + lp_scene_rasterize(scene, setup->rast); reset_context( setup ); @@ -190,7 +207,7 @@ execute_clears( struct lp_setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - lp_setup_rasterize_scene( setup, TRUE ); + lp_setup_rasterize_scene( setup ); } @@ -221,7 +238,7 @@ set_scene_state( struct lp_setup_context *setup, if (old_state == SETUP_CLEARED) execute_clears( setup ); else - lp_setup_rasterize_scene( setup, TRUE ); + lp_setup_rasterize_scene( setup ); break; default: @@ -596,10 +613,14 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, void lp_setup_update_state( struct lp_setup_context *setup ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + setup_check_scene_size_and_flush(setup); + + scene = lp_setup_get_current_scene(setup); + assert(setup->fs.current.jit_function); /* Some of the 'draw' pipeline stages may have changed some driver state. @@ -741,6 +762,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) reset_context( setup ); + util_unreference_framebuffer_state(&setup->fb); + for (i = 0; i < Elements(setup->fs.current_tex); i++) { pipe_resource_reference(&setup->fs.current_tex[i], NULL); } @@ -755,6 +778,8 @@ lp_setup_destroy( struct lp_setup_context *setup ) lp_scene_destroy(scene); } + lp_scene_queue_destroy(setup->empty_scenes); + lp_rast_destroy( setup->rast ); FREE( setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index a4012754784..5d3122e8ba2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -440,7 +440,12 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_setup_vbuf_destroy(struct vbuf_render *vbr) { - lp_setup_destroy(lp_setup_context(vbr)); + struct lp_setup_context *setup = lp_setup_context(vbr); + if (setup->vertex_buffer) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = NULL; + } + lp_setup_destroy(setup); } diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index dcbff190b62..18143807c91 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -31,11 +31,10 @@ #ifndef LP_STATE_H #define LP_STATE_H -#include "gallivm/lp_bld.h" - #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "gallivm/lp_bld.h" #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ @@ -85,8 +84,6 @@ struct lp_fragment_shader_variant_key struct lp_fragment_shader_variant { - struct lp_fragment_shader *shader; - struct lp_fragment_shader_variant_key key; LLVMValueRef function[2]; @@ -97,11 +94,7 @@ struct lp_fragment_shader_variant }; -/** - * Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ +/** Subclass of pipe_shader_state */ struct lp_fragment_shader { struct pipe_shader_state base; @@ -109,140 +102,58 @@ struct lp_fragment_shader struct tgsi_shader_info info; struct lp_fragment_shader_variant *variants; - - struct lp_fragment_shader_variant *current; }; /** Subclass of pipe_shader_state */ -struct lp_vertex_shader { +struct lp_vertex_shader +{ struct pipe_shader_state shader; struct draw_vertex_shader *draw_data; }; -struct lp_velems_state { + +/** Vertex element state */ +struct lp_velems_state +{ unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; }; -void * -llvmpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void llvmpipe_bind_blend_state(struct pipe_context *, - void *); -void llvmpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -llvmpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); void -llvmpipe_bind_vertex_sampler_states(struct pipe_context *, - unsigned num_samplers, - void **samplers); -void llvmpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -llvmpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void llvmpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void llvmpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -llvmpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void llvmpipe_bind_rasterizer_state(struct pipe_context *, void *); -void llvmpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void llvmpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void llvmpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void llvmpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ); - -void llvmpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void llvmpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - struct pipe_resource *buf); - -void *llvmpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void llvmpipe_bind_fs_state(struct pipe_context *, void *); -void llvmpipe_delete_fs_state(struct pipe_context *, void *); -void *llvmpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void llvmpipe_bind_vs_state(struct pipe_context *, void *); -void llvmpipe_delete_vs_state(struct pipe_context *, void *); - -void *llvmpipe_create_vertex_elements_state(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); -void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *); -void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *); - -void llvmpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void llvmpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void llvmpipe_set_fragment_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); +llvmpipe_set_framebuffer_state(struct pipe_context *, + const struct pipe_framebuffer_state *); void -llvmpipe_set_vertex_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); - -struct pipe_sampler_view * -llvmpipe_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ); +llvmpipe_update_fs(struct llvmpipe_context *lp); void -llvmpipe_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view); +llvmpipe_update_derived(struct llvmpipe_context *llvmpipe); -void llvmpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void llvmpipe_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_update_fs(struct llvmpipe_context *lp); +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe); +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe); -void llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count); void -llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe); void -llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe); void -llvmpipe_unmap_texture_surfaces(struct llvmpipe_context *lp); +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 4ee28473e80..8569507f4e5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -40,15 +40,16 @@ #include "lp_state.h" -void * +static void * llvmpipe_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { return mem_dup(blend, sizeof(*blend)); } -void llvmpipe_bind_blend_state( struct pipe_context *pipe, - void *blend ) + +static void +llvmpipe_bind_blend_state(struct pipe_context *pipe, void *blend) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -62,15 +63,17 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, llvmpipe->dirty |= LP_NEW_BLEND; } -void llvmpipe_delete_blend_state(struct pipe_context *pipe, - void *blend) + +static void +llvmpipe_delete_blend_state(struct pipe_context *pipe, void *blend) { FREE( blend ); } -void llvmpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) +static void +llvmpipe_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -93,14 +96,15 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, */ -void * +static void * llvmpipe_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { return mem_dup(depth_stencil, sizeof(*depth_stencil)); } -void + +static void llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { @@ -116,14 +120,17 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } -void + +static void llvmpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) { FREE( depth ); } -void llvmpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ) + +static void +llvmpipe_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *stencil_ref) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -142,3 +149,18 @@ void llvmpipe_set_stencil_ref( struct pipe_context *pipe, } +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state; + llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state; + llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; + + llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; + llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state; + llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state; + + llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; + + llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_clip.c b/src/gallium/drivers/llvmpipe/lp_state_clip.c index df68f27acc9..32ae079cc15 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_clip.c +++ b/src/gallium/drivers/llvmpipe/lp_state_clip.c @@ -32,8 +32,9 @@ #include "draw/draw_context.h" -void llvmpipe_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +llvmpipe_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -42,8 +43,9 @@ void llvmpipe_set_clip_state( struct pipe_context *pipe, } -void llvmpipe_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) +static void +llvmpipe_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -55,8 +57,9 @@ void llvmpipe_set_viewport_state( struct pipe_context *pipe, } -void llvmpipe_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) +static void +llvmpipe_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *scissor) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -67,8 +70,9 @@ void llvmpipe_set_scissor_state( struct pipe_context *pipe, } -void llvmpipe_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) +static void +llvmpipe_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -77,3 +81,14 @@ void llvmpipe_set_polygon_stipple( struct pipe_context *pipe, llvmpipe->poly_stipple = *stipple; /* struct copy */ llvmpipe->dirty |= LP_NEW_STIPPLE; } + + + +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; + llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; + llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; + llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9e9d9bcfb52..965777b6411 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -621,7 +621,6 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[15]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); @@ -680,7 +679,6 @@ generate_fragment(struct llvmpipe_context *lp, fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); - blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ @@ -939,7 +937,6 @@ generate_variant(struct llvmpipe_context *lp, if(!variant) return NULL; - variant->shader = shader; memcpy(&variant->key, key, sizeof *key); generate_fragment(lp, shader, variant, 0); @@ -953,7 +950,7 @@ generate_variant(struct llvmpipe_context *lp, } -void * +static void * llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -969,11 +966,16 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create fragment shader %p:\n", (void *) shader); + tgsi_dump(templ->tokens, 0); + } + return shader; } -void +static void llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -989,7 +991,7 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) } -void +static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -1032,7 +1034,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) -void +static void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_resource *constants) @@ -1163,8 +1165,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ } - shader->current = variant; - /* TODO: put this in the variant */ /* TODO: most of these can be relaxed, in particular the colormask */ opaque = !key.blend.logicop_enable && @@ -1178,7 +1178,19 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) ? TRUE : FALSE; lp_setup_set_fs_functions(lp->setup, - shader->current->jit_function[RAST_WHOLE], - shader->current->jit_function[RAST_EDGE_TEST], + variant->jit_function[RAST_WHOLE], + variant->jit_function[RAST_EDGE_TEST], opaque); } + + + +void +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state; + llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state; + llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; + + llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 3fba5e4e75d..622eb47ff45 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -34,7 +34,7 @@ -void * +static void * llvmpipe_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *rast) { @@ -46,7 +46,7 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, -void +static void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -79,10 +79,19 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) } -void llvmpipe_delete_rasterizer_state(struct pipe_context *pipe, - void *rasterizer) +static void +llvmpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) { FREE( rasterizer ); } + +void +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state; + llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state; + llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 3552ff50ce1..55d43368a3e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -41,7 +41,7 @@ -void * +static void * llvmpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { @@ -49,7 +49,7 @@ llvmpipe_create_sampler_state(struct pipe_context *pipe, } -void +static void llvmpipe_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -76,7 +76,7 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, } -void +static void llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, unsigned num_samplers, void **samplers) @@ -104,7 +104,7 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, } -void +static void llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -133,7 +133,7 @@ llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, } -void +static void llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -163,7 +163,7 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, } -struct pipe_sampler_view * +static struct pipe_sampler_view * llvmpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) @@ -182,7 +182,7 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe, } -void +static void llvmpipe_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { @@ -191,7 +191,7 @@ llvmpipe_sampler_view_destroy(struct pipe_context *pipe, } -void +static void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { @@ -199,4 +199,16 @@ llvmpipe_delete_sampler_state(struct pipe_context *pipe, } - +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; + + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; + llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; + llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; + llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; + llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; + llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index f6427aa908e..113f13db018 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" -void * +static void * llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) @@ -50,7 +50,7 @@ llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, return velems; } -void +static void llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) { @@ -65,13 +65,13 @@ llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem); } -void +static void llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) { FREE( velems ); } -void +static void llvmpipe_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -87,3 +87,15 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(llvmpipe->draw, count, buffers); } + + + +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; + llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; + llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; + + llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 884e3878e62..f2d88089906 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -28,15 +28,17 @@ #include "pipe/p_defines.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "util/u_memory.h" #include "draw/draw_context.h" #include "lp_context.h" +#include "lp_debug.h" #include "lp_state.h" -void * +static void * llvmpipe_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -57,6 +59,11 @@ llvmpipe_create_vs_state(struct pipe_context *pipe, if (state->draw_data == NULL) goto fail; + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create vertex shader %p:\n", (void *) state); + tgsi_dump(templ->tokens, 0); + } + return state; fail: @@ -69,7 +76,7 @@ fail: } -void +static void llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -87,7 +94,7 @@ llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) } -void +static void llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); @@ -99,3 +106,13 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) FREE( (void *)state->shader.tokens ); FREE( state ); } + + + +void +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state; + llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; + llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; +} diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 1432782cefa..8bd83f576f4 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -146,7 +146,7 @@ lp_surface_copy(struct pipe_context *pipe, void -lp_init_surface_functions(struct llvmpipe_context *lp) +llvmpipe_init_surface_functions(struct llvmpipe_context *lp) { lp->pipe.surface_copy = lp_surface_copy; lp->pipe.surface_fill = util_surface_fill; diff --git a/src/gallium/drivers/llvmpipe/lp_surface.h b/src/gallium/drivers/llvmpipe/lp_surface.h index 4d78a53c4f5..b1b896ebd90 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.h +++ b/src/gallium/drivers/llvmpipe/lp_surface.h @@ -36,7 +36,7 @@ struct llvmpipe_context; extern void -lp_init_surface_functions(struct llvmpipe_context *lp); +llvmpipe_init_surface_functions(struct llvmpipe_context *lp); #endif /* LP_SURFACE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 29bdfe36ae6..2f41d620c8a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -241,6 +241,13 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, /* display target */ struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpr->dt); + + if (lpr->tiled[0].data) { + align_free(lpr->tiled[0].data); + lpr->tiled[0].data = NULL; + } + + FREE(lpr->layout[0]); } else if (resource_is_texture(pt)) { /* regular texture */ @@ -1160,6 +1167,27 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, } +/** + * Return size of resource in bytes + */ +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource) +{ + const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource); + unsigned lvl, size = 0; + + for (lvl = 0; lvl <= lpr->base.last_level; lvl++) { + if (lpr->linear[lvl].data) + size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_LINEAR); + + if (lpr->tiled[lvl].data) + size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_TILED); + } + + return size; +} + + void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 7d0ae263e51..a8d08d6247f 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -184,6 +184,10 @@ void * llvmpipe_resource_data(struct pipe_resource *resource); +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource); + + ubyte * llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr, unsigned face_slice, unsigned level, diff --git a/src/gallium/drivers/nvfx/SConscript b/src/gallium/drivers/nvfx/SConscript index 7c452acb8b9..02d931b10e8 100644 --- a/src/gallium/drivers/nvfx/SConscript +++ b/src/gallium/drivers/nvfx/SConscript @@ -2,7 +2,7 @@ Import('*') env = env.Clone() -env.Append(CPPPATH = [ +env.PrependUnique(delete_existing=1, CPPPATH = [ '#/src/gallium/drivers', ]) diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 5a8e00f15a2..d3cd6bef96e 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ r300_emit.c \ r300_flush.c \ r300_fs.c \ + r300_hyperz.c \ r300_query.c \ r300_render.c \ r300_resource.c \ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 08aec427a15..3921085d76a 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -21,6 +21,7 @@ r300 = env.ConvenienceLibrary( 'r300_emit.c', 'r300_flush.c', 'r300_fs.c', + 'r300_hyperz.c', 'r300_query.c', 'r300_render.c', 'r300_resource.c', diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 43a42490a04..4f721ebb599 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -37,6 +37,8 @@ #include "r300_state_invariant.h" #include "r300_winsys.h" +#include <inttypes.h> + static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); @@ -49,9 +51,9 @@ static void r300_destroy_context(struct pipe_context* context) /* Print stats, if enabled. */ if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { fprintf(stderr, "r300: Stats for context %p:\n", r300); - fprintf(stderr, " : Flushes: %llu\n", r300->flush_counter); + fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter); foreach(atom, &r300->atom_list) { - fprintf(stderr, " : %s: %llu emits\n", + fprintf(stderr, " : %s: %" PRIu64 " emits\n", atom->name, atom->counter); } } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b6e20591eda..a05bf3ce099 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -257,6 +257,10 @@ struct r300_texture { /* A pitch for each mip-level */ unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* A pitch multiplied by blockwidth as hardware wants + * the number of pixels instead of the number of blocks. */ + unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face based on the texture target */ unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index aed0ffeeccb..4c2836f36a8 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,6 +38,7 @@ static struct debug_option debug_options[] = { { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, + { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 2816c35fd30..92b7517b8d6 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -83,7 +83,6 @@ void r300_emit_clip_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_clip_state* clip = (struct pipe_clip_state*)state; - int i; CS_LOCALS(r300); if (r300->screen->caps.has_tcl) { @@ -92,9 +91,7 @@ void r300_emit_clip_state(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_UCP_START : R300_PVS_UCP_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - for (i = 0; i < 6; i++) { - OUT_CS_TABLE(clip->ucp[i], 4); - } + OUT_CS_TABLE(clip->ucp, 6 * 4); OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) | R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CS; @@ -103,7 +100,6 @@ void r300_emit_clip_state(struct r300_context* r300, OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CS; } - } void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) @@ -416,12 +412,9 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); for(i = 0; i < count; ++i) { - const float *data; assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - data = buf->constants[i]; - - OUT_CS_TABLE(data, 4); } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -999,7 +992,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state) { - unsigned i; unsigned count = ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; @@ -1013,10 +1005,7 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - for (i = 0; i < count; i++) { - const float *data = buf->constants[i]; - OUT_CS_TABLE(data, 4); - } + OUT_CS_TABLE(buf->constants, count * 4); END_CS; } @@ -1175,6 +1164,11 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) } } + /* emit_query_end is not atomized. */ + dwords += 26; + /* let's reserve some more, just in case */ + dwords += 32; + return dwords; } diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c new file mode 100644 index 00000000000..b41b6b1508d --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -0,0 +1,108 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + + +#include "r300_hyperz.h" +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_fs.h" + +/*****************************************************************************/ +/* The ZTOP state */ +/*****************************************************************************/ + +static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when a new depth or stencil value + * can be written and changed. */ + + /* We might optionally check for [Z func: never] and inspect the stencil + * state in a similar fashion, but it's not terribly important. */ + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || + ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && + (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); +} + +static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when alpha testing can kill + * a fragment. */ + uint32_t af = dsa->alpha_function; + + return (af & R300_FG_ALPHA_FUNC_ENABLE) && + (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; +} + +static void r300_update_ztop(struct r300_context* r300) +{ + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; + + /* This is important enough that I felt it warranted a comment. + * + * According to the docs, these are the conditions where ZTOP must be + * disabled: + * 1) Alpha testing enabled + * 2) Texture kill instructions in fragment shader + * 3) Chroma key culling enabled + * 4) W-buffering enabled + * + * The docs claim that for the first three cases, if no ZS writes happen, + * then ZTOP can be used. + * + * (3) will never apply since we do not support chroma-keyed operations. + * (4) will need to be re-examined (and this comment updated) if/when + * Hyper-Z becomes supported. + * + * Additionally, the following conditions require disabled ZTOP: + * 5) Depth writes in fragment shader + * 6) Outstanding occlusion queries + * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * + * ~C. + */ + + /* ZS writes */ + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ + r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; + } + + r300->ztop_state.dirty = TRUE; +} + +void r300_update_hyperz_state(struct r300_context* r300) +{ + r300_update_ztop(r300); +} diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h new file mode 100644 index 00000000000..3df5053b896 --- /dev/null +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -0,0 +1,30 @@ +/* + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_HYPERZ_H +#define R300_HYPERZ_H + +struct r300_context; + +void r300_update_hyperz_state(struct r300_context* r300); + +#endif diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b95a24a5bf0..7c3a7902a49 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -235,7 +235,7 @@ void r500_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2 + dwords); r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); @@ -600,8 +600,9 @@ void r300_draw_range_elements(struct pipe_context* pipe, start += short_count; count -= short_count; - /* 16 spare dwords are enough for emit_draw_elements. */ - if (count && r300_reserve_cs_space(r300, 16)) { + /* 16 spare dwords are enough for emit_draw_elements. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 74)) { r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); r300_emit_aos(r300, 0, TRUE); @@ -668,8 +669,9 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, count -= short_count; /* Again, we emit both AOS and draw_arrays so there should be - * at least 128 spare dwords. */ - if (count && r300_reserve_cs_space(r300, 128)) { + * at least 128 spare dwords. + * Also reserve some space for emit_query_end. */ + if (count && r300_reserve_cs_space(r300, 186)) { r300_emit_buffer_validate(r300, TRUE, NULL); r300_emit_dirty_state(r300); } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8fc1d5aa00e..c0391267031 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "util/u_format.h" +#include "util/u_format_s3tc.h" #include "util/u_memory.h" #include "r300_context.h" @@ -319,6 +320,8 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) r300_init_screen_resource_functions(r300screen); + util_format_s3tc_init(); + return &r300screen->screen; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 70ea2fe8d92..735c233c9e2 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -72,6 +72,7 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { #define DBG_NO_TILING 0x0000100 #define DBG_NO_IMMD 0x0000200 #define DBG_STATS 0x0000400 +#define DBG_RS 0x0000800 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e8171e94903..9319dadfd1e 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -853,8 +853,11 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + /* XXX Disable point sprites until we know what's wrong with them. */ + rs->rs.sprite_coord_enable = 0; + /* Point sprites */ - if (state->sprite_coord_enable) { + if (rs->rs.sprite_coord_enable) { rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE; for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) @@ -1072,11 +1075,9 @@ r300_create_sampler_view(struct pipe_context *pipe, swizzle[2] = templ->swizzle_b; swizzle[3] = templ->swizzle_a; - /* XXX Enable swizzles when they become supported. Now we get RGBA - * everywhere. And do testing! */ view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - 0); /*swizzle);*/ + swizzle); if (r300_screen(pipe->screen)->caps.is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } @@ -1280,26 +1281,57 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Check if the format is aligned to the size of DWORD. */ + r300_vertex_psc(velems); + + /* Check if the format is aligned to the size of DWORD. + * We only care about the blocksizes of the formats since + * swizzles are already set up. */ for (i = 0; i < count; i++) { format = &velems->velem[i].src_format; /* Replace some formats with their aligned counterparts, * this is OK because we check for aligned strides too. */ - /* XXX We need X instead of A in the format names. */ switch (*format) { + /* Align to RGBA8. */ + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8B8_UNORM: - *format = PIPE_FORMAT_R8G8B8X8_UNORM; + *format = PIPE_FORMAT_R8G8B8A8_UNORM; continue; + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: case PIPE_FORMAT_R8G8B8_SNORM: *format = PIPE_FORMAT_R8G8B8A8_SNORM; continue; + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: case PIPE_FORMAT_R8G8B8_USCALED: *format = PIPE_FORMAT_R8G8B8A8_USCALED; continue; + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: case PIPE_FORMAT_R8G8B8_SSCALED: *format = PIPE_FORMAT_R8G8B8A8_SSCALED; continue; + + /* Align to RG16. */ + case PIPE_FORMAT_R16_UNORM: + *format = PIPE_FORMAT_R16G16_UNORM; + continue; + case PIPE_FORMAT_R16_SNORM: + *format = PIPE_FORMAT_R16G16_SNORM; + continue; + case PIPE_FORMAT_R16_USCALED: + *format = PIPE_FORMAT_R16G16_USCALED; + continue; + case PIPE_FORMAT_R16_SSCALED: + *format = PIPE_FORMAT_R16G16_SSCALED; + continue; + case PIPE_FORMAT_R16_FLOAT: + *format = PIPE_FORMAT_R16G16_FLOAT; + continue; + + /* Align to RGBA16. */ case PIPE_FORMAT_R16G16B16_UNORM: *format = PIPE_FORMAT_R16G16B16A16_UNORM; continue; @@ -1312,6 +1344,10 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, case PIPE_FORMAT_R16G16B16_SSCALED: *format = PIPE_FORMAT_R16G16B16A16_SSCALED; continue; + case PIPE_FORMAT_R16G16B16_FLOAT: + *format = PIPE_FORMAT_R16G16B16A16_FLOAT; + continue; + default:; } @@ -1327,7 +1363,6 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, } } - r300_vertex_psc(velems); } } return velems; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 46c192eae14..ccc4b583a6a 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_fs.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state.h" @@ -42,6 +43,7 @@ enum r300_rs_swizzle { SWIZ_XYZW = 0, SWIZ_X001, SWIZ_XY01, + SWIZ_0001, }; static void r300_draw_emit_attrib(struct r300_context* r300, @@ -169,10 +171,10 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) } static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R300_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -218,10 +220,10 @@ static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) } static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, - boolean swizzle_0001) + enum r300_rs_swizzle swiz) { rs->ip[id] |= R500_RS_COL_PTR(ptr); - if (swizzle_0001) { + if (swiz == SWIZ_0001) { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); } else { rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); @@ -276,7 +278,7 @@ static void r300_update_rs_block(struct r300_context* r300, { struct r300_rs_block rs = { { 0 } }; int i, col_count = 0, tex_count = 0, fp_offset = 0, count; - void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); @@ -301,12 +303,17 @@ static void r300_update_rs_block(struct r300_context* r300, vs_outputs->color[1] != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ - rX00_rs_col(&rs, col_count, i, FALSE); + rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { rX00_rs_col_write(&rs, col_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized color %i written to FS.\n", i); + } else { + DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i); } col_count++; } else { @@ -314,6 +321,9 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->color[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n", + i); } } } @@ -331,9 +341,15 @@ static void r300_update_rs_block(struct r300_context* r300, /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); - if (sprite_coord) - debug_printf("r300: SpriteCoord (generic index %i) is being written to reg %i\n", i, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, + "r300: Rasterized generic %i written to FS%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); + } else { + DBG(r300, DBG_RS, + "r300: Rasterized generic %i unused%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; } else { @@ -341,6 +357,9 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n", + i, sprite_coord ? " (sprite coord)" : ""); } } } @@ -355,6 +374,10 @@ static void r300_update_rs_block(struct r300_context* r300, if (fs_inputs->fog != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; + + DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n"); + } else { + DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; } else { @@ -362,6 +385,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* If we try to set it to (0,0,0,1), it will lock up. */ if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; + + DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } } @@ -371,16 +396,23 @@ static void r300_update_rs_block(struct r300_context* r300, rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); rX00_rs_tex_write(&rs, tex_count, fp_offset); + DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n"); + fp_offset++; tex_count++; } /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { - rX00_rs_col(&rs, 0, 0, TRUE); + rX00_rs_col(&rs, 0, 0, SWIZ_0001); col_count++; + + DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n"); } + DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " + "generics: %i.\n", col_count, tex_count); + rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; @@ -402,77 +434,6 @@ static void r300_update_derived_shader_state(struct r300_context* r300) r300_update_rs_block(r300, &vs->outputs, &r300_fs(r300)->shader->inputs); } -static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when a new depth or stencil value - * can be written and changed. */ - - /* We might optionally check for [Z func: never] and inspect the stencil - * state in a similar fashion, but it's not terribly important. */ - return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || - (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || - ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && - (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); -} - -static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) -{ - /* We are interested only in the cases when alpha testing can kill - * a fragment. */ - uint32_t af = dsa->alpha_function; - - return (af & R300_FG_ALPHA_FUNC_ENABLE) && - (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; -} - -static void r300_update_ztop(struct r300_context* r300) -{ - struct r300_ztop_state* ztop_state = - (struct r300_ztop_state*)r300->ztop_state.state; - - /* This is important enough that I felt it warranted a comment. - * - * According to the docs, these are the conditions where ZTOP must be - * disabled: - * 1) Alpha testing enabled - * 2) Texture kill instructions in fragment shader - * 3) Chroma key culling enabled - * 4) W-buffering enabled - * - * The docs claim that for the first three cases, if no ZS writes happen, - * then ZTOP can be used. - * - * (3) will never apply since we do not support chroma-keyed operations. - * (4) will need to be re-examined (and this comment updated) if/when - * Hyper-Z becomes supported. - * - * Additionally, the following conditions require disabled ZTOP: - * 5) Depth writes in fragment shader - * 6) Outstanding occlusion queries - * - * This register causes stalls all the way from SC to CB when changed, - * but it is buffered on-chip so it does not hurt to write it if it has - * not changed. - * - * ~C. - */ - - /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ - r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - ztop_state->z_buffer_top = R300_ZTOP_DISABLE; - } else { - ztop_state->z_buffer_top = R300_ZTOP_ENABLE; - } - - r300->ztop_state.dirty = TRUE; -} - static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -578,5 +539,5 @@ void r300_update_derived_state(struct r300_context* r300) r300_swtcl_vertex_psc(r300); } - r300_update_ztop(r300); + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index c2bff67ccb6..fcbdb91b67e 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -453,10 +453,17 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - for (i = 0; i < 4; i++) { + for (i = 0; i < desc->nr_channels; i++) { swizzle |= MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i); } + /* Set (0,0,0,1) in unused components. */ + for (; i < 3; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i); + } + for (; i < 4; i++) { + swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i); + } return swizzle | (0xf << R300_WRITE_ENA_SHIFT); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index cfa61b0b3f6..69e6a124458 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -34,9 +34,6 @@ #include "r300_screen.h" #include "r300_winsys.h" -/* XXX Enable float textures here. */ -/*#define ENABLE_FLOAT_TEXTURES*/ - #define TILE_WIDTH 0 #define TILE_HEIGHT 1 @@ -74,7 +71,7 @@ static boolean r300_format_is_plain(enum pipe_format format) * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle) + const unsigned char *swizzle_view) { uint32_t result = 0; const struct util_format_description *desc; @@ -98,6 +95,7 @@ uint32_t r300_translate_texformat(enum pipe_format format, R300_TX_FORMAT_SIGNED_Z, R300_TX_FORMAT_SIGNED_W, }; + unsigned char swizzle[4]; desc = util_format_description(format); @@ -144,25 +142,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add swizzle. */ - if (!swizzle) { - swizzle = desc->swizzle; - } /*else { - if (swizzle[0] != desc->swizzle[0] || - swizzle[1] != desc->swizzle[1] || - swizzle[2] != desc->swizzle[2] || - swizzle[3] != desc->swizzle[3]) - { - const char n[6] = "RGBA01"; - fprintf(stderr, "Got different swizzling! Format: %c%c%c%c, " - "View: %c%c%c%c\n", - n[desc->swizzle[0]], n[desc->swizzle[1]], - n[desc->swizzle[2]], n[desc->swizzle[3]], - n[swizzle[0]], n[swizzle[1]], n[swizzle[2]], - n[swizzle[3]]); + /* Get swizzle. */ + if (swizzle_view) { + /* Compose two sets of swizzles. */ + for (i = 0; i < 4; i++) { + swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? + desc->swizzle[swizzle_view[i]] : swizzle_view[i]; } - }*/ + } else { + memcpy(swizzle, desc->swizzle, sizeof(swizzle)); + } + /* Add swizzle. */ for (i = 0; i < 4; i++) { switch (swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: @@ -316,7 +307,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, } return ~0; -#if defined(ENABLE_FLOAT_TEXTURES) case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[0].size) { case 16: @@ -340,7 +330,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_32F_32F_32F_32F | result; } } -#endif } return ~0; /* Unsupported/unknown. */ @@ -405,16 +394,12 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R16G16B16A16_FLOAT: -#endif return R300_COLOR_FORMAT_ARGB16161616; /* 128-bit buffers. */ -#if defined(ENABLE_FLOAT_TEXTURES) case PIPE_FORMAT_R32G32B32A32_FLOAT: return R300_COLOR_FORMAT_ARGB32323232; -#endif /* YUV buffers. */ case PIPE_FORMAT_UYVY: @@ -532,7 +517,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - //case PIPE_FORMAT_R16G16B16A16_FLOAT: /* not in pipe_format */ + case PIPE_FORMAT_R16G16B16A16_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT: return modifier | R300_C0_SEL_R | R300_C1_SEL_G | @@ -573,7 +558,7 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, if (tex->uses_pitch) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->pitch[0] - 1) & 0x1fff; + f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); @@ -614,7 +599,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, if (util_format_is_depth_or_stencil(tex->b.b.format)) { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.depthpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | R300_DEPTHMICROTILE(tex->microtile); } @@ -622,7 +607,7 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, } else { for (i = 0; i <= tex->b.b.last_level; i++) { tex->fb_state.colorpitch[i] = - tex->pitch[i] | + tex->hwpitch[i] | r300_translate_colorformat(tex->b.b.format) | R300_COLOR_TILE(tex->mip_macrotile[i]) | R300_COLOR_MICROTILE(tex->microtile); @@ -767,7 +752,7 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, unsigned i, size; - if (screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && tex->b.b.target == PIPE_TEXTURE_3D && tex->b.b.last_level > 0) { size = 0; @@ -813,6 +798,8 @@ static void r300_setup_miptree(struct r300_screen* screen, tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); + tex->hwpitch[i] = + tex->pitch[i] * util_format_get_blockwidth(base->format); SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 453d42b188f..ba79ec068a1 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -28,7 +28,7 @@ struct r300_texture; uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle); + const unsigned char *swizzle_view); uint32_t r500_tx_format_msb_bit(enum pipe_format format); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index f6428ed760f..89f39af9761 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -108,11 +108,9 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ case TGSI_OPCODE_IF: return RC_OPCODE_IF; /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ - /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */ case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ - /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */ /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 60c4d18e3a2..1642981eaa8 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -121,7 +121,7 @@ struct r300_winsys_screen { /* Write a table of dwords to the command buffer. */ void (*write_cs_table)(struct r300_winsys_screen* winsys, - void *dwords, unsigned count); + const void *dwords, unsigned count); /* Write a relocated dword to the command buffer. */ void (*write_cs_reloc)(struct r300_winsys_screen *winsys, diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 11aa0c4583b..8bb0294238a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -71,6 +71,8 @@ softpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 3533c4fd4f0..7aa85559b23 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -215,6 +215,35 @@ softpipe_resource_get_handle(struct pipe_screen *screen, /** + * Helper function to compute offset (in bytes) for a particular + * texture level/face/slice from the start of the buffer. + */ +static unsigned +sp_get_tex_image_offset(const struct softpipe_resource *spr, + unsigned level, unsigned face, unsigned zslice) +{ + const unsigned hgt = u_minify(spr->base.height0, level); + const unsigned nblocksy = util_format_get_nblocksy(spr->base.format, hgt); + unsigned offset = spr->level_offset[level]; + + if (spr->base.target == PIPE_TEXTURE_CUBE) { + assert(zslice == 0); + offset += face * nblocksy * spr->stride[level]; + } + else if (spr->base.target == PIPE_TEXTURE_3D) { + assert(face == 0); + offset += zslice * nblocksy * spr->stride[level]; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + return offset; +} + + +/** * Get a pipe_surface "view" into a texture resource. */ static struct pipe_surface * @@ -235,25 +264,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->format = pt->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); - ps->offset = spr->level_offset[level]; + ps->offset = sp_get_tex_image_offset(spr, level, face, zslice); ps->usage = usage; ps->face = face; ps->level = level; ps->zslice = zslice; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * - spr->stride[level]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) * - spr->stride[level]; - } - else { - assert(face == 0); - assert(zslice == 0); - } } return ps; } @@ -291,8 +307,8 @@ softpipe_get_transfer(struct pipe_context *pipe, unsigned usage, const struct pipe_box *box) { - struct softpipe_resource *sprex = softpipe_resource(resource); - struct softpipe_transfer *spr; + struct softpipe_resource *spr = softpipe_resource(resource); + struct softpipe_transfer *spt; assert(resource); assert(sr.level <= resource->last_level); @@ -323,33 +339,20 @@ softpipe_get_transfer(struct pipe_context *pipe, } } - spr = CALLOC_STRUCT(softpipe_transfer); - if (spr) { - struct pipe_transfer *pt = &spr->base; + spt = CALLOC_STRUCT(softpipe_transfer); + if (spt) { + struct pipe_transfer *pt = &spt->base; enum pipe_format format = resource->format; - int nblocksy = util_format_get_nblocksy(resource->format, - u_minify(resource->height0, sr.level)); pipe_resource_reference(&pt->resource, resource); pt->sr = sr; pt->usage = usage; pt->box = *box; - pt->stride = sprex->stride[sr.level]; + pt->stride = spr->stride[sr.level]; - spr->offset = sprex->level_offset[sr.level]; - - if (resource->target == PIPE_TEXTURE_CUBE) { - spr->offset += sr.face * nblocksy * pt->stride; - } - else if (resource->target == PIPE_TEXTURE_3D) { - spr->offset += box->z * nblocksy * pt->stride; - } - else { - assert(sr.face == 0); - assert(box->z == 0); - } - - spr->offset += - box->y / util_format_get_blockheight(format) * spr->base.stride + + spt->offset = sp_get_tex_image_offset(spr, sr.level, sr.face, box->z); + + spt->offset += + box->y / util_format_get_blockheight(format) * spt->base.stride + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); return pt; @@ -378,26 +381,24 @@ static void * softpipe_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct softpipe_transfer *sp_transfer = softpipe_transfer(transfer); - struct softpipe_resource *sp_resource = softpipe_resource(transfer->resource); + struct softpipe_transfer *spt = softpipe_transfer(transfer); + struct softpipe_resource *spr = softpipe_resource(transfer->resource); struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys; uint8_t *map; /* resources backed by display target treated specially: */ - if (sp_resource->dt) { - map = winsys->displaytarget_map(winsys, - sp_resource->dt, - transfer->usage); + if (spr->dt) { + map = winsys->displaytarget_map(winsys, spr->dt, transfer->usage); } else { - map = sp_resource->data; + map = spr->data; } if (map == NULL) return NULL; else - return map + sp_transfer->offset; + return map + spt->offset; } @@ -434,26 +435,25 @@ softpipe_user_buffer_create(struct pipe_screen *screen, unsigned bytes, unsigned bind_flags) { - struct softpipe_resource *buffer; + struct softpipe_resource *spr; - buffer = CALLOC_STRUCT(softpipe_resource); - if(!buffer) + spr = CALLOC_STRUCT(softpipe_resource); + if (!spr) return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ - buffer->base.bind = bind_flags; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.flags = 0; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; + pipe_reference_init(&spr->base.reference, 1); + spr->base.screen = screen; + spr->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ + spr->base.bind = bind_flags; + spr->base.usage = PIPE_USAGE_IMMUTABLE; + spr->base.flags = 0; + spr->base.width0 = bytes; + spr->base.height0 = 1; + spr->base.depth0 = 1; + spr->userBuffer = TRUE; + spr->data = ptr; + + return &spr->base; } diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c index 15258c1966b..ba630582e59 100644 --- a/src/gallium/drivers/svga/svga_resource.c +++ b/src/gallium/drivers/svga/svga_resource.c @@ -14,7 +14,7 @@ svga_resource_create(struct pipe_screen *screen, if (template->target == PIPE_BUFFER) return svga_buffer_create(screen, template); else - return svga_resource_create(screen, template); + return svga_texture_create(screen, template); } @@ -26,7 +26,7 @@ svga_resource_from_handle(struct pipe_screen * screen, if (template->target == PIPE_BUFFER) return NULL; else - return svga_resource_from_handle(screen, template, whandle); + return svga_texture_from_handle(screen, template, whandle); } diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 3d4f56a67bd..7d7024c4a7d 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -49,9 +49,7 @@ translate_opcode( case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; - case TGSI_OPCODE_ENDFOR: return SVGA3DOP_ENDLOOP; case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; - case TGSI_OPCODE_BGNFOR: return SVGA3DOP_LOOP; case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; @@ -2686,7 +2684,6 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || - emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 3223e8d844c..a852ad97cab 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -269,7 +269,7 @@ enum pipe_transfer_usage { * - pipe_context::transfer_flush_region * - OpenGL's ARB_map_buffer_range extension, MAP_FLUSH_EXPLICIT_BIT flag. */ - PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11), + PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11) }; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c5c480f1f0e..e21aaacc18a 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -276,12 +276,10 @@ struct tgsi_property_data { #define TGSI_OPCODE_TXL 72 #define TGSI_OPCODE_BRK 73 #define TGSI_OPCODE_IF 74 -#define TGSI_OPCODE_BGNFOR 75 -#define TGSI_OPCODE_REP 76 + /* gap */ #define TGSI_OPCODE_ELSE 77 #define TGSI_OPCODE_ENDIF 78 -#define TGSI_OPCODE_ENDFOR 79 -#define TGSI_OPCODE_ENDREP 80 + /* gap */ #define TGSI_OPCODE_PUSHA 81 #define TGSI_OPCODE_POPA 82 #define TGSI_OPCODE_CEIL 83 diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index 0e5ab5b5198..a808d2d9ddf 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -118,17 +118,15 @@ dri_destroy_context(__DRIcontext * cPriv) GLboolean dri_unbind_context(__DRIcontext * cPriv) { + /* dri_util.c ensures cPriv is not null */ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); + struct dri_context *ctx = dri_context(cPriv); struct st_api *stapi = screen->st_api; - if (cPriv) { - struct dri_context *ctx = dri_context(cPriv); - - if (--ctx->bind_count == 0) { - if (ctx->st == stapi->get_current(stapi)) { - ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); - stapi->make_current(stapi, NULL, NULL, NULL); - } + if (--ctx->bind_count == 0) { + if (ctx->st == stapi->get_current(stapi)) { + ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + stapi->make_current(stapi, NULL, NULL, NULL); } } @@ -140,36 +138,30 @@ dri_make_current(__DRIcontext * cPriv, __DRIdrawable * driDrawPriv, __DRIdrawable * driReadPriv) { + /* dri_util.c ensures cPriv is not null */ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); + struct dri_context *ctx = dri_context(cPriv); struct st_api *stapi = screen->st_api; + struct dri_drawable *draw = dri_drawable(driDrawPriv); + struct dri_drawable *read = dri_drawable(driReadPriv); + struct st_context_iface *old_st = stapi->get_current(stapi); - if (cPriv) { - struct dri_context *ctx = dri_context(cPriv); - struct dri_drawable *draw = dri_drawable(driDrawPriv); - struct dri_drawable *read = dri_drawable(driReadPriv); - struct st_context_iface *old_st; - - old_st = stapi->get_current(stapi); - if (old_st && old_st != ctx->st) - ctx->st->flush(old_st, PIPE_FLUSH_RENDER_CACHE, NULL); + if (old_st && old_st != ctx->st) + old_st->flush(old_st, PIPE_FLUSH_RENDER_CACHE, NULL); - ++ctx->bind_count; + ++ctx->bind_count; - if (ctx->dPriv != driDrawPriv) { - ctx->dPriv = driDrawPriv; - draw->texture_stamp = driDrawPriv->lastStamp - 1; - } - if (ctx->rPriv != driReadPriv) { - ctx->rPriv = driReadPriv; - read->texture_stamp = driReadPriv->lastStamp - 1; - } - - stapi->make_current(stapi, ctx->st, &draw->base, &read->base); + if (ctx->dPriv != driDrawPriv) { + ctx->dPriv = driDrawPriv; + draw->texture_stamp = driDrawPriv->lastStamp - 1; } - else { - stapi->make_current(stapi, NULL, NULL, NULL); + if (ctx->rPriv != driReadPriv) { + ctx->rPriv = driReadPriv; + read->texture_stamp = driReadPriv->lastStamp - 1; } + stapi->make_current(stapi, ctx->st, &draw->base, &read->base); + return GL_TRUE; } diff --git a/src/gallium/winsys/i965/drm/SConscript b/src/gallium/winsys/i965/drm/SConscript index 150ab19a33e..abf9aac5c01 100644 --- a/src/gallium/winsys/i965/drm/SConscript +++ b/src/gallium/winsys/i965/drm/SConscript @@ -2,6 +2,8 @@ Import('*') env = env.Clone() +env.ParseConfig('pkg-config --cflags libdrm') + i965drm_sources = [ 'i965_drm_api.c', 'i965_drm_buffer.c', diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index fab42929514..60e409fe10f 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -8,6 +8,7 @@ radeon_sources = [ 'radeon_r300.c', ] +env.ParseConfig('pkg-config --cflags libdrm_radeon') env.Append(CPPPATH = '#/src/gallium/drivers/r300') radeonwinsys = env.ConvenienceLibrary( diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index cab41dba3d5..80923de9373 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -202,7 +202,7 @@ static void radeon_write_cs_dword(struct r300_winsys_screen *rws, } static void radeon_write_cs_table(struct r300_winsys_screen *rws, - void *table, unsigned count) + const void *table, unsigned count) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); radeon_cs_write_table(ws->cs, table, count); diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c index 90ffc4868f7..104d03f2730 100644 --- a/src/gallium/winsys/svga/drm/vmw_context.c +++ b/src/gallium/winsys/svga/drm/vmw_context.c @@ -114,6 +114,19 @@ vmw_svga_winsys_context(struct svga_winsys_context *swc) } +static INLINE unsigned +vmw_translate_to_pb_flags(unsigned flags) +{ + unsigned f = 0; + if (flags & SVGA_RELOC_READ) + f |= PB_USAGE_GPU_READ; + + if (flags & SVGA_RELOC_WRITE) + f |= PB_USAGE_GPU_WRITE; + + return f; +} + static enum pipe_error vmw_swc_flush(struct svga_winsys_context *swc, struct pipe_fence_handle **pfence) @@ -264,6 +277,7 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); struct vmw_region_relocation *reloc; + unsigned translated_flags; enum pipe_error ret; assert(vswc->region.staged < vswc->region.reserved); @@ -275,7 +289,8 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, ++vswc->region.staged; - ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, flags); + translated_flags = vmw_translate_to_pb_flags(flags); + ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, translated_flags); /* TODO: Update pipebuffer to reserve buffers and not fail here */ assert(ret == PIPE_OK); |