diff options
Diffstat (limited to 'src/gallium')
698 files changed, 23991 insertions, 26176 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 136423513c6..5d9d2db7866 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -53,13 +53,16 @@ install: ##### RULES ##### -.c.o: +%.s: %.c + $(CC) -S $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +%.o: %.c $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ -.cpp.o: +%.o: %.cpp $(CXX) -c $(INCLUDES) $(DEFINES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ -.S.o: +%.o: %.S $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ diff --git a/src/gallium/SConscript b/src/gallium/SConscript index eea32b1314b..d56c5c84617 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -8,9 +8,10 @@ for driver in env['drivers']: SConscript(os.path.join('drivers', driver, 'SConscript')) SConscript('state_trackers/python/SConscript') -SConscript('state_trackers/glx/xlib/SConscript') -SConscript('state_trackers/dri/SConscript') -SConscript('state_trackers/xorg/SConscript') +if platform != 'embedded': + SConscript('state_trackers/glx/xlib/SConscript') + SConscript('state_trackers/dri/SConscript') + SConscript('state_trackers/xorg/SConscript') if platform == 'windows': SConscript('state_trackers/wgl/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index e3af41c6e04..02c65a9b2d9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -48,12 +48,14 @@ C_SOURCES = \ draw/draw_vs_sse.c \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ - pipebuffer/pb_buffer_fenced.c \ + os/os_misc.c \ + os/os_stream_stdc.c \ + os/os_stream_wd.c \ + os/os_time.c \ pipebuffer/pb_buffer_malloc.c \ pipebuffer/pb_bufmgr_alt.c \ pipebuffer/pb_bufmgr_cache.c \ pipebuffer/pb_bufmgr_debug.c \ - pipebuffer/pb_bufmgr_fenced.c \ pipebuffer/pb_bufmgr_mm.c \ pipebuffer/pb_bufmgr_ondemand.c \ pipebuffer/pb_bufmgr_pool.c \ @@ -92,6 +94,7 @@ C_SOURCES = \ util/u_debug_dump.c \ util/u_debug_symbol.c \ util/u_debug_stack.c \ + util/u_bitmask.c \ util/u_blit.c \ util/u_blitter.c \ util/u_cache.c \ @@ -111,14 +114,12 @@ C_SOURCES = \ util/u_math.c \ util/u_mm.c \ util/u_rect.c \ + util/u_ringbuffer.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ - util/u_stream_stdc.c \ - util/u_stream_wd.c \ util/u_surface.c \ util/u_texture.c \ util/u_tile.c \ - util/u_time.c \ util/u_timed_winsys.c \ util/u_upload_mgr.c \ util/u_simple_screen.c \ @@ -129,39 +130,43 @@ C_SOURCES = \ vl/vl_shader_build.c GALLIVM_SOURCES = \ - gallivm/gallivm.cpp \ - gallivm/gallivm_cpu.cpp \ - gallivm/instructions.cpp \ - gallivm/loweringpass.cpp \ - gallivm/tgsitollvm.cpp \ - gallivm/storage.cpp \ - gallivm/storagesoa.cpp \ - gallivm/instructionssoa.cpp + gallivm/lp_bld_alpha.c \ + gallivm/lp_bld_arit.c \ + gallivm/lp_bld_blend_aos.c \ + gallivm/lp_bld_blend_logicop.c \ + gallivm/lp_bld_blend_soa.c \ + gallivm/lp_bld_const.c \ + gallivm/lp_bld_conv.c \ + gallivm/lp_bld_debug.c \ + gallivm/lp_bld_depth.c \ + gallivm/lp_bld_flow.c \ + gallivm/lp_bld_format_aos.c \ + gallivm/lp_bld_format_query.c \ + gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_interp.c \ + gallivm/lp_bld_intr.c \ + gallivm/lp_bld_logic.c \ + gallivm/lp_bld_pack.c \ + gallivm/lp_bld_sample.c \ + gallivm/lp_bld_sample_soa.c \ + gallivm/lp_bld_struct.c \ + gallivm/lp_bld_swizzle.c \ + gallivm/lp_bld_tgsi_soa.c \ + gallivm/lp_bld_type.c -INC_SOURCES = \ - gallivm/gallivm_builtins.cpp \ - gallivm/gallivmsoabuiltins.cpp +GALLIVM_CPP_SOURCES = \ + gallivm/lp_bld_misc.cpp -# XXX: gallivm doesn't build correctly so disable for now -#ifeq ($(MESA_LLVM),1) -#DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS -#CPP_SOURCES += \ -# $(GALLIVM_SOURCES) -#endif - -include ../Makefile.template +ifeq ($(MESA_LLVM),1) +C_SOURCES += \ + $(GALLIVM_SOURCES) +CPP_SOURCES += \ + $(GALLIVM_CPP_SOURCES) +endif -gallivm/gallivm_builtins.cpp: gallivm/llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp1.bin - -gallivm/gallivmsoabuiltins.cpp: gallivm/soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp2.bin +include ../Makefile.template indices/u_indices_gen.c: indices/u_indices_gen.py diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 782eb533863..9709344b546 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -82,12 +82,15 @@ source = [ #'indices/u_unfilled_indices.c', 'indices/u_indices_gen.c', 'indices/u_unfilled_gen.c', + 'os/os_misc.c', + 'os/os_stream_stdc.c', + 'os/os_stream_wd.c', + 'os/os_time.c', 'pipebuffer/pb_buffer_fenced.c', 'pipebuffer/pb_buffer_malloc.c', 'pipebuffer/pb_bufmgr_alt.c', 'pipebuffer/pb_bufmgr_cache.c', 'pipebuffer/pb_bufmgr_debug.c', - 'pipebuffer/pb_bufmgr_fenced.c', 'pipebuffer/pb_bufmgr_mm.c', 'pipebuffer/pb_bufmgr_ondemand.c', 'pipebuffer/pb_bufmgr_pool.c', @@ -106,7 +109,6 @@ source = [ 'rtasm/rtasm_ppc_spe.c', 'tgsi/tgsi_build.c', 'tgsi/tgsi_dump.c', - 'tgsi/tgsi_dump_c.c', 'tgsi/tgsi_exec.c', 'tgsi/tgsi_info.c', 'tgsi/tgsi_iterate.c', @@ -147,14 +149,12 @@ source = [ 'util/u_math.c', 'util/u_mm.c', 'util/u_rect.c', + 'util/u_ringbuffer.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', - 'util/u_stream_stdc.c', - 'util/u_stream_wd.c', 'util/u_surface.c', 'util/u_texture.c', 'util/u_tile.c', - 'util/u_time.c', 'util/u_timed_winsys.c', 'util/u_upload_mgr.c', 'util/u_simple_screen.c', @@ -165,16 +165,32 @@ source = [ 'vl/vl_shader_build.c', ] -if env['llvm']: +if drawllvm: source += [ - 'gallivm/gallivm.cpp', - 'gallivm/gallivm_cpu.cpp', - 'gallivm/instructions.cpp', - 'gallivm/loweringpass.cpp', - 'gallivm/tgsitollvm.cpp', - 'gallivm/storage.cpp', - 'gallivm/storagesoa.cpp', - 'gallivm/instructionssoa.cpp', + 'gallivm/lp_bld_alpha.c', + 'gallivm/lp_bld_arit.c', + 'gallivm/lp_bld_blend_aos.c', + 'gallivm/lp_bld_blend_logicop.c', + 'gallivm/lp_bld_blend_soa.c', + 'gallivm/lp_bld_const.c', + 'gallivm/lp_bld_conv.c', + 'gallivm/lp_bld_debug.c', + 'gallivm/lp_bld_depth.c', + 'gallivm/lp_bld_flow.c', + 'gallivm/lp_bld_format_aos.c', + 'gallivm/lp_bld_format_query.c', + 'gallivm/lp_bld_format_soa.c', + 'gallivm/lp_bld_interp.c', + 'gallivm/lp_bld_intr.c', + 'gallivm/lp_bld_logic.c', + 'gallivm/lp_bld_misc.cpp', + 'gallivm/lp_bld_pack.c', + 'gallivm/lp_bld_sample.c', + 'gallivm/lp_bld_sample_soa.c', + 'gallivm/lp_bld_struct.c', + 'gallivm/lp_bld_swizzle.c', + 'gallivm/lp_bld_tgsi_soa.c', + 'gallivm/lp_bld_type.c', ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index e6dce3f0e5b..a6a07e72c2f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -113,26 +113,6 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_ return hash; } -static int _cso_size_for_type(enum cso_cache_type type) -{ - switch(type) { - case CSO_BLEND: - return sizeof(struct pipe_blend_state); - case CSO_SAMPLER: - return sizeof(struct pipe_sampler_state); - case CSO_DEPTH_STENCIL_ALPHA: - return sizeof(struct pipe_depth_stencil_alpha_state); - case CSO_RASTERIZER: - return sizeof(struct pipe_rasterizer_state); - case CSO_FRAGMENT_SHADER: - return sizeof(struct pipe_shader_state); - case CSO_VERTEX_SHADER: - return sizeof(struct pipe_shader_state); - } - return 0; -} - - static void delete_blend_state(void *state, void *data) { struct cso_blend *cso = (struct cso_blend *)state; @@ -282,10 +262,9 @@ void *cso_hash_find_data_from_template( struct cso_hash *hash, struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, - void *templ) + void *templ, unsigned size) { struct cso_hash_iter iter = cso_find_state(sc, hash_key, type); - int size = _cso_size_for_type(type); while (!cso_hash_iter_is_null(iter)) { void *iter_data = cso_hash_iter_data(iter); if (!memcmp(iter_data, templ, size)) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 6b5c230e8f2..eea60b940bb 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -160,7 +160,7 @@ struct cso_hash_iter cso_find_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, - void *templ); + void *templ, unsigned size); void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, cso_state_callback func, void *user_data); void * cso_take_state(struct cso_cache *sc, unsigned hash_key, diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 2b16332e143..c638239e80b 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -36,6 +36,7 @@ */ #include "pipe/p_state.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" @@ -310,18 +311,21 @@ void cso_destroy_context( struct cso_context *ctx ) enum pipe_error cso_set_blend(struct cso_context *ctx, const struct pipe_blend_state *templ) { - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_BLEND, - (void*)templ); + unsigned key_size, hash_key; + struct cso_hash_iter iter; void *handle; + key_size = templ->independent_blend_enable ? sizeof(struct pipe_blend_state) : + (char *)&(templ->rt[1]) - (char *)templ; + hash_key = cso_construct_key((void*)templ, key_size); + iter = cso_find_state_template(ctx->cache, hash_key, CSO_BLEND, (void*)templ, key_size); + if (cso_hash_iter_is_null(iter)) { struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - memcpy(&cso->state, templ, sizeof(*templ)); + memcpy(&cso->state, templ, key_size); cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; cso->context = ctx->pipe; @@ -369,10 +373,11 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx, void *handle = NULL; if (templ != NULL) { - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + unsigned key_size = sizeof(struct pipe_sampler_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_SAMPLER, - (void*)templ); + (void*)templ, key_size); if (cso_hash_iter_is_null(iter)) { struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); @@ -409,10 +414,11 @@ cso_single_vertex_sampler(struct cso_context *ctx, void *handle = NULL; if (templ != NULL) { - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + unsigned key_size = sizeof(struct pipe_sampler_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_SAMPLER, - (void*)templ); + (void*)templ, key_size); if (cso_hash_iter_is_null(iter)) { struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); @@ -539,6 +545,38 @@ void cso_restore_samplers(struct cso_context *ctx) cso_single_sampler_done( ctx ); } +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = cso_single_vertex_sampler( ctx, i, templates[i] ); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < ctx->nr_samplers; i++) { + temp = cso_single_vertex_sampler( ctx, i, NULL ); + if (temp != PIPE_OK) + error = temp; + } + + cso_single_vertex_sampler_done( ctx ); + + return error; +} + void cso_save_vertex_samplers(struct cso_context *ctx) { @@ -666,12 +704,12 @@ cso_restore_vertex_sampler_textures(struct cso_context *ctx) enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_depth_stencil_alpha_state)); + unsigned key_size = sizeof(struct pipe_depth_stencil_alpha_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, - CSO_DEPTH_STENCIL_ALPHA, - (void*)templ); + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ, key_size); void *handle; if (cso_hash_iter_is_null(iter)) { @@ -723,11 +761,11 @@ void cso_restore_depth_stencil_alpha(struct cso_context *ctx) enum pipe_error cso_set_rasterizer(struct cso_context *ctx, const struct pipe_rasterizer_state *templ) { - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_rasterizer_state)); + unsigned key_size = sizeof(struct pipe_rasterizer_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_RASTERIZER, - (void*)templ); + (void*)templ, key_size); void *handle = NULL; if (cso_hash_iter_is_null(iter)) { @@ -809,7 +847,8 @@ enum pipe_error cso_set_fragment_shader(struct cso_context *ctx, struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, - (void*)tokens); + (void*)tokens, + sizeof(*templ)); /* XXX correct? tokens_size? */ void *handle = NULL; if (cso_hash_iter_is_null(iter)) { @@ -888,7 +927,8 @@ enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, sizeof(struct pipe_shader_state)); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_VERTEX_SHADER, - (void*)templ); + (void*)templ, + sizeof(*templ)); void *handle = NULL; if (cso_hash_iter_is_null(iter)) { diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index b9e313e32d6..d2089b1c883 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -84,6 +84,10 @@ enum pipe_error cso_single_sampler( struct cso_context *cso, void cso_single_sampler_done( struct cso_context *cso ); +enum pipe_error cso_set_vertex_samplers(struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states); + void cso_save_vertex_samplers(struct cso_context *cso); diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 667aa46b208..d5ddc4a6a92 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,11 +34,8 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "draw_context.h" -#include "draw_vbuf.h" #include "draw_vs.h" #include "draw_gs.h" -#include "draw_pt.h" -#include "draw_pipe.h" struct draw_context *draw_create( void ) @@ -95,6 +92,7 @@ void draw_destroy( struct draw_context *draw ) draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); draw_vs_destroy( draw ); + draw_gs_destroy( draw ); FREE( draw ); } @@ -236,17 +234,20 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, unsigned shader_type, + unsigned slot, const void *buffer, unsigned size ) { debug_assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_GEOMETRY); + debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS); + if (shader_type == PIPE_SHADER_VERTEX) { - draw->pt.user.vs_constants = buffer; - draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + draw->pt.user.vs_constants[slot] = buffer; + draw_vs_set_constants(draw, slot, buffer, size); } else if (shader_type == PIPE_SHADER_GEOMETRY) { - draw->pt.user.gs_constants = buffer; - draw_gs_set_constants( draw, (const float (*)[4])buffer, size ); + draw->pt.user.gs_constants[slot] = buffer; + draw_gs_set_constants(draw, slot, buffer, size); } } @@ -351,7 +352,10 @@ draw_find_shader_output(const struct draw_context *draw, /** - * Return number of the shader outputs. + * Return total number of the shader outputs. This function is similar to + * draw_current_shader_outputs() but this function also counts any extra + * vertex/geometry output attributes that may be filled in by some draw + * stages (such as AA point, AA line). * * If geometry shader is present, its output will be returned, * if not vertex shader is used. @@ -361,8 +365,9 @@ draw_num_shader_outputs(const struct draw_context *draw) { uint count = draw->vs.vertex_shader->info.num_outputs; - /* if geometry shader is present, its outputs go to te - * driver, not the vertex shaders */ + /* If a geometry shader is present, its outputs go to the + * driver, else the vertex shader's outputs. + */ if (draw->gs.geometry_shader) count = draw->gs.geometry_shader->info.num_outputs; @@ -373,7 +378,8 @@ draw_num_shader_outputs(const struct draw_context *draw) /** - * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches. + * Provide TGSI sampler objects for vertex/geometry shaders that use + * texture fetches. * This might only be used by software drivers for the time being. */ void @@ -453,14 +459,27 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) } -int draw_current_shader_outputs(struct draw_context *draw) +/** + * Return the number of output attributes produced by the geometry + * shader, if present. If no geometry shader, return the number of + * outputs from the vertex shader. + * \sa draw_num_shader_outputs + */ +uint +draw_current_shader_outputs(const struct draw_context *draw) { if (draw->gs.geometry_shader) return draw->gs.num_gs_outputs; return draw->vs.num_vs_outputs; } -int draw_current_shader_position_output(struct draw_context *draw) + +/** + * Return the index of the shader output which will contain the + * vertex position. + */ +uint +draw_current_shader_position_output(const struct draw_context *draw) { if (draw->gs.geometry_shader) return draw->gs.position_output; diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index b716209df29..acd81b9712d 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -151,10 +151,12 @@ void draw_set_mapped_element_buffer( struct draw_context *draw, void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); -void draw_set_mapped_constant_buffer(struct draw_context *draw, - unsigned shader_type, - const void *buffer, - unsigned size ); +void +draw_set_mapped_constant_buffer(struct draw_context *draw, + unsigned shader_type, + unsigned slot, + const void *buffer, + unsigned size); /*********************************************************************** @@ -164,6 +166,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count); +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void draw_flush(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 5db2e755423..7069aa6b181 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -59,10 +59,21 @@ draw_gs_init( struct draw_context *draw ) return TRUE; } +void draw_gs_destroy( struct draw_context *draw ) +{ + if (!draw->gs.machine) + return; + + align_free(draw->gs.machine->Primitives); -void draw_gs_set_constants( struct draw_context *draw, - const float (*constants)[4], - unsigned size ) + tgsi_exec_machine_destroy(draw->gs.machine); +} + +void +draw_gs_set_constants(struct draw_context *draw, + unsigned slot, + const void *constants, + unsigned size) { } @@ -282,7 +293,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, void draw_geometry_shader_run(struct draw_geometry_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned vertex_size) @@ -293,7 +304,9 @@ void draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned num_primitives = count/num_vertices; unsigned inputs_from_vs = 0; - machine->Consts = constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->Consts[i] = constants[i]; + } for (i = 0; i < shader->info.num_inputs; ++i) { if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID) diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index d6a97d9c4ef..d8eb2103433 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -62,7 +62,7 @@ struct draw_geometry_shader { void draw_geometry_shader_run(struct draw_geometry_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride); diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1c6d657297c..83dc1a35f4c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -32,6 +32,7 @@ #include "draw/draw_private.h" #include "draw/draw_pipe.h" +#include "util/u_debug.h" @@ -106,10 +107,9 @@ void draw_pipeline_destroy( struct draw_context *draw ) - - - - +/** + * Build primitive to render a point with vertex at v0. + */ static void do_point( struct draw_context *draw, const char *v0 ) { @@ -123,6 +123,10 @@ static void do_point( struct draw_context *draw, } +/** + * Build primitive to render a line with vertices at v0, v1. + * \param flags bitmask of DRAW_PIPE_EDGE_x, DRAW_PIPE_RESET_STIPPLE + */ static void do_line( struct draw_context *draw, ushort flags, const char *v0, @@ -139,6 +143,10 @@ static void do_line( struct draw_context *draw, } +/** + * Build primitive to render a triangle with vertices at v0, v1, v2. + * \param flags bitmask of DRAW_PIPE_EDGE_x, DRAW_PIPE_RESET_STIPPLE + */ static void do_triangle( struct draw_context *draw, ushort flags, char *v0, @@ -157,7 +165,10 @@ static void do_triangle( struct draw_context *draw, } - +/* + * Set up macros for draw_pt_decompose.h template code. + * This code uses vertex indexes / elements. + */ #define QUAD(i0,i1,i2,i3) \ do_triangle( draw, \ ( DRAW_PIPE_RESET_STIPPLE | \ @@ -175,16 +186,16 @@ static void do_triangle( struct draw_context *draw, #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ - elts[i0], /* flags */ \ + elts[i0], /* flags */ \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) ); #define LINE(flags,i0,i1) \ do_line( draw, \ - elts[i0], \ + elts[i0], \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * elts[i1]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) ); #define POINT(i0) \ do_point( draw, \ @@ -213,7 +224,9 @@ static void do_triangle( struct draw_context *draw, -/* Code to run the pipeline on a fairly arbitary collection of vertices. +/** + * Code to run the pipeline on a fairly arbitary collection of vertices. + * For drawing indexed primitives. * * Vertex headers must be pre-initialized with the * UNDEFINED_VERTEX_ID, this code will cause that id to become @@ -243,6 +256,12 @@ void draw_pipeline_run( struct draw_context *draw, draw->pipeline.vertex_count = 0; } + + +/* + * Set up macros for draw_pt_decompose.h template code. + * This code is for non-indexed rendering (no elts). + */ #define QUAD(i0,i1,i2,i3) \ do_triangle( draw, \ ( DRAW_PIPE_RESET_STIPPLE | \ @@ -293,6 +312,10 @@ void draw_pipeline_run( struct draw_context *draw, #include "draw_pt_decompose.h" + +/* + * For drawing non-indexed primitives. + */ void draw_pipeline_run_linear( struct draw_context *draw, unsigned prim, struct vertex_header *vertices, diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 4585dcdb48a..8f6ca15dfa2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -35,6 +35,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" @@ -48,6 +49,10 @@ #include "draw_pipe.h" +/** Approx number of new tokens for instructions in aa_transform_inst() */ +#define NUM_NEW_TOKENS 50 + + /** * Max texture level for the alpha texture used for antialiasing */ @@ -178,12 +183,7 @@ aa_transform_decl(struct tgsi_transform_context *ctx, static int free_bit(uint bitfield) { - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) - return i; - } - return -1; + return ffs(~bitfield) - 1; } @@ -342,11 +342,10 @@ generate_aaline_fs(struct aaline_stage *aaline) const struct pipe_shader_state *orig_fs = &aaline->fs->state; struct pipe_shader_state aaline_fs; struct aa_transform_context transform; - -#define MAX 1000 + const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; aaline_fs = *orig_fs; /* copy to init */ - aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + aaline_fs.tokens = tgsi_alloc_tokens(newLen); if (aaline_fs.tokens == NULL) return FALSE; @@ -362,7 +361,7 @@ generate_aaline_fs(struct aaline_stage *aaline) tgsi_transform_shader(orig_fs->tokens, (struct tgsi_token *) aaline_fs.tokens, - MAX, &transform.base); + newLen, &transform.base); #if 0 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index d86717e5182..97f34808793 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -53,6 +53,10 @@ #include "draw_pipe.h" +/** Approx number of new tokens for instructions in aa_transform_inst() */ +#define NUM_NEW_TOKENS 200 + + /* * Enabling NORMALIZE might give _slightly_ better results. * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or @@ -81,16 +85,19 @@ struct aapoint_stage { struct draw_stage stage; - int psize_slot; + /** half of pipe_rasterizer_state::point_size */ float radius; + /** vertex attrib slot containing point size */ + int psize_slot; + /** this is the vertex attrib slot for the new texcoords */ uint tex_slot; + + /** vertex attrib slot containing position */ uint pos_slot; - /* - * Currently bound state - */ + /** Currently bound fragment shader */ struct aapoint_fragment_shader *fs; /* @@ -491,11 +498,10 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) const struct pipe_shader_state *orig_fs = &aapoint->fs->state; struct pipe_shader_state aapoint_fs; struct aa_transform_context transform; - -#define MAX 1000 + const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; aapoint_fs = *orig_fs; /* copy to init */ - aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + aapoint_fs.tokens = tgsi_alloc_tokens(newLen); if (aapoint_fs.tokens == NULL) return FALSE; @@ -511,7 +517,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) tgsi_transform_shader(orig_fs->tokens, (struct tgsi_token *) aapoint_fs.tokens, - MAX, &transform.base); + newLen, &transform.base); #if 0 /* DEBUG */ printf("draw_aapoint, orig shader:\n"); @@ -575,8 +581,8 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) const struct aapoint_stage *aapoint = aapoint_stage(stage); struct prim_header tri; struct vertex_header *v[4]; - uint texPos = aapoint->tex_slot; - uint pos_slot = aapoint->pos_slot; + const uint tex_slot = aapoint->tex_slot; + const uint pos_slot = aapoint->pos_slot; float radius, *pos, *tex; uint i; float k; @@ -643,16 +649,16 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) pos[1] += radius; /* new texcoords */ - tex = v[0]->data[texPos]; + tex = v[0]->data[tex_slot]; ASSIGN_4V(tex, -1, -1, k, 1); - tex = v[1]->data[texPos]; + tex = v[1]->data[tex_slot]; ASSIGN_4V(tex, 1, -1, k, 1); - tex = v[2]->data[texPos]; + tex = v[2]->data[tex_slot]; ASSIGN_4V(tex, 1, 1, k, 1); - tex = v[3]->data[texPos]; + tex = v[3]->data[tex_slot]; ASSIGN_4V(tex, -1, 1, k, 1); /* emit 2 tris for the quad strip */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 205cda5eabe..51a6115ebf5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -55,7 +55,7 @@ -struct clipper { +struct clip_stage { struct draw_stage stage; /**< base class */ /* Basically duplicate some of the flatshading logic here: @@ -70,9 +70,9 @@ struct clipper { /* This is a bit confusing: */ -static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { - return (struct clipper *)stage; + return (struct clip_stage *)stage; } @@ -92,11 +92,12 @@ static void interp_attr( float *fdst, fdst[3] = LINTERP( t, fout[3], fin[3] ); } + static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) { - const struct clipper *clipper = clipper_stage(stage); + const struct clip_stage *clipper = clip_stage(stage); uint i; for (i = 0; i < clipper->num_color_attribs; i++) { const uint attr = clipper->color_attribs[i]; @@ -108,7 +109,7 @@ static void copy_colors( struct draw_stage *stage, /* Interpolate between two vertices to produce a third. */ -static void interp( const struct clipper *clip, +static void interp( const struct clip_stage *clip, struct vertex_header *dst, float t, const struct vertex_header *out, @@ -179,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ if (i == n-1) - header.flags |= edge_last; + header.flags |= edge_last; if (0) { const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; @@ -200,13 +201,14 @@ static void emit_poly( struct draw_stage *stage, } } + static INLINE float dot4(const float *a, const float *b) { - return (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); + return (a[0] * b[0] + + a[1] * b[1] + + a[2] * b[2] + + a[3] * b[3]); } @@ -217,7 +219,7 @@ do_clip_tri( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *a[MAX_CLIPPED_VERTICES]; struct vertex_header *b[MAX_CLIPPED_VERTICES]; struct vertex_header **inlist = a; @@ -280,6 +282,7 @@ do_clip_tri( struct draw_stage *stage, dp_prev = dp; } + /* swap in/out lists */ { struct vertex_header **tmp = inlist; inlist = outlist; @@ -291,15 +294,11 @@ do_clip_tri( struct draw_stage *stage, /* If flat-shading, copy color to new provoking vertex. */ if (clipper->flat && inlist[0] != header->v[2]) { - if (1) { - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - } + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[2]); } - - /* Emit the polygon as triangles to the setup stage: */ if (n >= 3) @@ -314,7 +313,7 @@ do_clip_line( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - const struct clipper *clipper = clipper_stage( stage ); + const struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; const float *pos0 = v0->clip; @@ -416,13 +415,14 @@ clip_tri( struct draw_stage *stage, } } + /* Update state. Could further delay this until we hit the first * primitive that really requires clipping. */ static void clip_init_state( struct draw_stage *stage ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; @@ -488,7 +488,7 @@ static void clip_destroy( struct draw_stage *stage ) */ struct draw_stage *draw_clip_stage( struct draw_context *draw ) { - struct clipper *clipper = CALLOC_STRUCT(clipper); + struct clip_stage *clipper = CALLOC_STRUCT(clip_stage); if (clipper == NULL) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 11b39db5990..dc66c65a56c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -50,8 +50,6 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) } - - static void cull_tri( struct draw_stage *stage, struct prim_header *header ) { @@ -62,7 +60,7 @@ static void cull_tri( struct draw_stage *stage, const float *v1 = header->v[1]->data[pos]; const float *v2 = header->v[2]->data[pos]; - /* edge vectors e = v0 - v2, f = v1 - v2 */ + /* edge vectors: e = v0 - v2, f = v1 - v2 */ const float ex = v0[0] - v2[0]; const float ey = v0[1] - v2[1]; const float fx = v1[0] - v2[0]; @@ -72,7 +70,7 @@ static void cull_tri( struct draw_stage *stage, header->det = ex * fy - ey * fx; if (header->det != 0) { - /* if (det < 0 then Z points toward camera and triangle is + /* if det < 0 then Z points toward the camera and the triangle is * counter-clockwise winding. */ unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; @@ -84,6 +82,7 @@ static void cull_tri( struct draw_stage *stage, } } + static void cull_first_tri( struct draw_stage *stage, struct prim_header *header ) { @@ -96,13 +95,13 @@ static void cull_first_tri( struct draw_stage *stage, } - static void cull_flush( struct draw_stage *stage, unsigned flags ) { stage->tri = cull_first_tri; stage->next->flush( stage->next, flags ); } + static void cull_reset_stipple_counter( struct draw_stage *stage ) { stage->next->reset_stipple_counter( stage->next ); @@ -140,7 +139,7 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) return &cull->stage; - fail: +fail: if (cull) cull->stage.destroy( &cull->stage ); diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 0cc2b718641..d0d99aa331a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -37,6 +37,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" @@ -49,6 +50,9 @@ #include "draw_pipe.h" +/** Approx number of new tokens for instructions in pstip_transform_inst() */ +#define NUM_NEW_TOKENS 50 + /** * Subclass of pipe_shader_state to carry extra fragment shader info. @@ -171,12 +175,7 @@ pstip_transform_immed(struct tgsi_transform_context *ctx, static int free_bit(uint bitfield) { - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) - return i; - } - return -1; + return ffs(~bitfield) - 1; } @@ -332,11 +331,10 @@ generate_pstip_fs(struct pstip_stage *pstip) /*struct draw_context *draw = pstip->stage.draw;*/ struct pipe_shader_state pstip_fs; struct pstip_transform_context transform; - -#define MAX 1000 + const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; pstip_fs = *orig_fs; /* copy to init */ - pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + pstip_fs.tokens = tgsi_alloc_tokens(newLen); if (pstip_fs.tokens == NULL) return FALSE; @@ -351,7 +349,7 @@ generate_pstip_fs(struct pstip_stage *pstip) tgsi_transform_shader(orig_fs->tokens, (struct tgsi_token *) pstip_fs.tokens, - MAX, &transform.base); + newLen, &transform.base); #if 0 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index ac29634d677..153097e543e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -151,8 +151,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) { struct draw_context *draw = stage->draw; struct draw_stage *next = draw->pipeline.rasterize; - int need_det = 0; - int precalc_flat = 0; + boolean need_det = FALSE; + boolean precalc_flat = FALSE; boolean wide_lines, wide_points; /* Set the validate's next stage to the rasterize stage, so that it @@ -194,7 +194,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) if (wide_lines) { draw->pipeline.wide_line->next = next; next = draw->pipeline.wide_line; - precalc_flat = 1; + precalc_flat = TRUE; } if (wide_points || draw->rasterizer->sprite_coord_enable) { @@ -205,7 +205,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; - precalc_flat = 1; /* only needed for lines really */ + precalc_flat = TRUE; /* only needed for lines really */ } if (draw->rasterizer->poly_stipple_enable @@ -218,8 +218,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; next = draw->pipeline.unfilled; - precalc_flat = 1; /* only needed for triangles really */ - need_det = 1; + precalc_flat = TRUE; /* only needed for triangles really */ + need_det = TRUE; } if (draw->rasterizer->flatshade && precalc_flat) { @@ -231,13 +231,13 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) draw->rasterizer->offset_ccw) { draw->pipeline.offset->next = next; next = draw->pipeline.offset; - need_det = 1; + need_det = TRUE; } if (draw->rasterizer->light_twoside) { draw->pipeline.twoside->next = next; next = draw->pipeline.twoside; - need_det = 1; + need_det = TRUE; } /* Always run the cull stage as we calculate determinant there diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 1a5269c0de9..d40c0352401 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf, /* Note: we really do want data[0] here, not data[pos]: */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); - vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -271,10 +271,12 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index e49041556bd..1e6e01af9e2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -48,8 +48,6 @@ struct pipe_context; -struct gallivm_prog; -struct gallivm_cpu_engine; struct draw_vertex_shader; struct draw_context; struct draw_stage; @@ -153,8 +151,8 @@ struct draw_context const void *vbuffer[PIPE_MAX_ATTRIBS]; /** constant buffer (for vertex/geometry shader) */ - const void *vs_constants; - const void *gs_constants; + const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -172,6 +170,8 @@ struct draw_context boolean force_passthrough; /**< never clip or shade */ + boolean dump_vs; + double mrd; /**< minimum resolvable depth value, for polygon offset */ /* pipe state that we need: */ @@ -191,19 +191,15 @@ struct draw_context uint num_samplers; struct tgsi_sampler **samplers; - /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. - */ - struct gallivm_cpu_engine *engine; - /* Here's another one: */ struct aos_machine *aos_machine; - const float (*aligned_constants)[4]; + const void *aligned_constants[PIPE_MAX_CONSTANT_BUFFERS]; - const float (*aligned_constant_storage)[4]; - unsigned const_storage_size; + const void *aligned_constant_storage[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned const_storage_size[PIPE_MAX_CONSTANT_BUFFERS]; struct translate *fetch; @@ -239,6 +235,8 @@ struct draw_context unsigned reduced_prim; + unsigned instance_id; + void *driver_private; }; @@ -252,9 +250,11 @@ void draw_vs_destroy( struct draw_context *draw ); void draw_vs_set_viewport( struct draw_context *, const struct pipe_viewport_state * ); -void draw_vs_set_constants( struct draw_context *, - const float (*constants)[4], - unsigned size ); +void +draw_vs_set_constants(struct draw_context *, + unsigned slot, + const void *constants, + unsigned size); @@ -262,15 +262,20 @@ void draw_vs_set_constants( struct draw_context *, * Geometry shading code: */ boolean draw_gs_init( struct draw_context *draw ); -void draw_gs_set_constants( struct draw_context *, - const float (*constants)[4], - unsigned size ); + +void +draw_gs_set_constants(struct draw_context *, + unsigned slot, + const void *constants, + unsigned size); + +void draw_gs_destroy( struct draw_context *draw ); /******************************************************************************* * Common shading code: */ -int draw_current_shader_outputs(struct draw_context *draw); -int draw_current_shader_position_output(struct draw_context *draw); +uint draw_current_shader_outputs(const struct draw_context *draw); +uint draw_current_shader_position_output(const struct draw_context *draw); /******************************************************************************* * Vertex processing (was passthrough) code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 2801dbafe47..f5ed32d0b05 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -33,7 +33,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" -#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "util/u_math.h" #include "util/u_prim.h" @@ -280,20 +279,33 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - unsigned reduced_prim = u_reduced_prim(prim); + draw_arrays_instanced(draw, prim, start, count, 0, 1); +} + +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + unsigned reduced_prim = u_reduced_prim(mode); + unsigned instance; + if (reduced_prim != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, prim, start, MIN2(count, 20)); + draw_print_arrays(draw, mode, start, MIN2(count, 20)); #if 0 { int i; - debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", - prim, start, count); + debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", + mode, start, count); tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { @@ -311,6 +323,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, } #endif - /* drawing done here: */ - draw_pt_arrays(draw, prim, start, count); + for (instance = 0; instance < instanceCount; instance++) { + draw->instance_id = instance + startInstance; + draw_pt_arrays(draw, mode, start, count); + } } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 20edf7a227e..d5e0d92a605 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vertex_input_count, - unsigned vertex_size ); + unsigned vertex_size, + unsigned instance_id_index ); void draw_pt_fetch_run( struct pt_fetch *fetch, const unsigned *elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 064e16c295c..4fb53276bbe 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -121,10 +121,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit, emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; @@ -204,6 +206,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->run( translate, 0, vertex_count, + draw->instance_id, hw_verts ); render->unmap_vertices( render, @@ -263,6 +266,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->instance_id, hw_verts); if (0) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 305bfef4352..252be5053e4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -30,7 +30,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "translate/translate.h" #include "translate/translate_cache.h" @@ -58,12 +57,14 @@ struct pt_fetch { */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vs_input_count, - unsigned vertex_size ) + unsigned vertex_size, + unsigned instance_id_index ) { struct draw_context *draw = fetch->draw; unsigned nr_inputs; - unsigned i, nr = 0; + unsigned i, nr = 0, ei = 0; unsigned dst_offset = 0; + unsigned num_extra_inputs = 0; struct translate_key key; fetch->vertex_size = vertex_size; @@ -78,9 +79,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { /* Need to set header->vertex_id = 0xffff somehow. */ + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; + key.element[nr].instance_divisor = 0; key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].output_offset = dst_offset; dst_offset += 1 * sizeof(float); @@ -91,19 +94,36 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, */ dst_offset += 4 * sizeof(float); } - - assert( draw->pt.nr_vertex_elements >= vs_input_count ); - nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements ); + if (instance_id_index != ~0) { + num_extra_inputs++; + } + + assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count); + + nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs); for (i = 0; i < nr_inputs; i++) { - key.element[nr].input_format = draw->pt.vertex_element[i].src_format; - key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; - key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - key.element[nr].output_offset = dst_offset; + if (i == instance_id_index) { + key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; + key.element[nr].input_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_offset = dst_offset; + + dst_offset += sizeof(uint); + } else { + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; + key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + ei++; + dst_offset += 4 * sizeof(float); + } - dst_offset += 4 * sizeof(float); nr++; } @@ -158,6 +178,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, translate->run_elts( translate, elts, count, + draw->instance_id, verts ); } @@ -183,6 +204,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, translate->run( translate, start, count, + draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e7fe6b3b768..2a604470e9a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -166,9 +166,11 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, continue; } + key.element[i].type = TRANSLATE_ELEMENT_NORMAL; key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; + key.element[i].instance_divisor = src->instance_divisor; key.element[i].output_format = output_format; key.element[i].output_offset = dst_offset; @@ -256,6 +258,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, feme->translate->run_elts( feme->translate, fetch_elts, fetch_count, + draw->instance_id, hw_verts ); if (0) { @@ -314,6 +317,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); if (0) { @@ -374,6 +378,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 734c05f0688..c5dfbcfa3cb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -40,7 +40,6 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" -#include "translate/translate.h" struct fetch_shade_emit; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 1a9df4cac5d..56b69354b21 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -33,7 +33,6 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" #include "draw/draw_gs.h" -#include "translate/translate.h" struct fetch_pipeline_middle_end { @@ -59,6 +58,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; + unsigned i; + unsigned instance_id_index = ~0; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -66,6 +67,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned nr = MAX2( vs->info.num_inputs, vs->info.num_outputs + 1 ); + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + fpme->prim = prim; fpme->opt = opt; @@ -79,7 +89,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, vs->info.num_inputs, - fpme->vertex_size ); + fpme->vertex_size, + instance_id_index ); /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ @@ -152,7 +163,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, vshader->run_linear(vshader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.vs_constants, + draw->pt.user.vs_constants, fetch_count, fpme->vertex_size, fpme->vertex_size); @@ -160,7 +171,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, draw_geometry_shader_run(gshader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.gs_constants, + draw->pt.user.gs_constants, fetch_count, fpme->vertex_size, fpme->vertex_size); @@ -237,7 +248,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.vs_constants, + draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); @@ -246,7 +257,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, draw_geometry_shader_run(geometry_shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.gs_constants, + draw->pt.user.gs_constants, count, fpme->vertex_size, fpme->vertex_size); @@ -317,7 +328,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.vs_constants, + draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); @@ -326,7 +337,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle draw_geometry_shader_run(geometry_shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.gs_constants, + draw->pt.user.gs_constants, count, fpme->vertex_size, fpme->vertex_size); diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 55151823a14..9728d5c2bdf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -30,7 +30,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" #include "draw/draw_pt.h" struct pt_post_vs { diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 17c3b8cec26..3236d38e6ab 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -33,6 +33,7 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" +#include "util/u_debug.h" void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) { diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 554f4ac3c18..8c3c7befbc7 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -39,7 +39,9 @@ #define DRAW_VERTEX_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "util/u_debug.h" /** diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 35536895326..6bdd612e6f4 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -43,29 +43,32 @@ #include "translate/translate.h" #include "translate/translate_cache.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" - -void draw_vs_set_constants( struct draw_context *draw, - const float (*constants)[4], - unsigned size ) +void +draw_vs_set_constants(struct draw_context *draw, + unsigned slot, + const void *constants, + unsigned size) { if (((uintptr_t)constants) & 0xf) { - if (size > draw->vs.const_storage_size) { - if (draw->vs.aligned_constant_storage) - align_free((void *)draw->vs.aligned_constant_storage); - draw->vs.aligned_constant_storage = align_malloc( size, 16 ); + if (size > draw->vs.const_storage_size[slot]) { + if (draw->vs.aligned_constant_storage[slot]) { + align_free((void *)draw->vs.aligned_constant_storage[slot]); + } + draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16); } - memcpy( (void*)draw->vs.aligned_constant_storage, - constants, - size ); - constants = draw->vs.aligned_constant_storage; + memcpy((void *)draw->vs.aligned_constant_storage[slot], + constants, + size); + constants = draw->vs.aligned_constant_storage[slot]; } - - draw->vs.aligned_constants = constants; - draw_vs_aos_machine_constants( draw->vs.aos_machine, constants ); + + draw->vs.aligned_constants[slot] = constants; + draw_vs_aos_machine_constants(draw->vs.aos_machine, slot, constants); } @@ -83,6 +86,10 @@ draw_create_vertex_shader(struct draw_context *draw, { struct draw_vertex_shader *vs; + if (draw->dump_vs) { + tgsi_dump(shader->tokens, 0); + } + vs = draw_create_vs_llvm( draw, shader ); if (!vs) { vs = draw_create_vs_sse( draw, shader ); @@ -152,6 +159,8 @@ draw_delete_vertex_shader(struct draw_context *draw, boolean draw_vs_init( struct draw_context *draw ) { + draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE); + draw->vs.machine = tgsi_exec_machine_create(); if (!draw->vs.machine) return FALSE; @@ -176,6 +185,8 @@ draw_vs_init( struct draw_context *draw ) void draw_vs_destroy( struct draw_context *draw ) { + uint i; + if (draw->vs.fetch_cache) translate_cache_destroy(draw->vs.fetch_cache); @@ -185,8 +196,11 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.aos_machine) draw_vs_aos_machine_destroy(draw->vs.aos_machine); - if (draw->vs.aligned_constant_storage) - align_free((void*)draw->vs.aligned_constant_storage); + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + if (draw->vs.aligned_constant_storage[i]) { + align_free((void *)draw->vs.aligned_constant_storage[i]); + } + } tgsi_exec_machine_destroy(draw->vs.machine); } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index e3b807ebd0e..d095c9bad1d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -43,6 +43,7 @@ struct draw_varient_input enum pipe_format format; unsigned buffer; unsigned offset; + unsigned instance_divisor; }; struct draw_varient_output @@ -131,7 +132,7 @@ struct draw_vertex_shader { void (*run_linear)( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ); @@ -211,8 +212,10 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key struct aos_machine *draw_vs_aos_machine( void ); void draw_vs_aos_machine_destroy( struct aos_machine *machine ); -void draw_vs_aos_machine_constants( struct aos_machine *machine, - const float (*constants)[4] ); +void +draw_vs_aos_machine_constants(struct aos_machine *machine, + unsigned slot, + const void *constants); void draw_vs_aos_machine_viewport( struct aos_machine *machine, const struct pipe_viewport_state *viewport ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1aaae4ab7a4..e7121f36541 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2114,11 +2114,14 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + unsigned i; if (0) debug_printf("%s %d\n", __FUNCTION__, count); machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = vaos->draw->vs.aligned_constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->constants[i] = vaos->draw->vs.aligned_constants[i]; + } machine->immediates = vaos->base.vs->immediates; machine->buffer = vaos->buffer; @@ -2135,12 +2138,15 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + unsigned i; if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count, vaos->base.key.const_vbuffers); machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = vaos->draw->vs.aligned_constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->constants[i] = vaos->draw->vs.aligned_constants[i]; + } machine->immediates = vaos->base.vs->immediates; machine->buffer = vaos->buffer; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 2cf72ddf7b1..1911242f825 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -122,7 +122,7 @@ struct aos_machine { ushort fpucntl; /* one of FPU_* above */ const float (*immediates)[4]; /* points to shader data */ - const float (*constants)[4]; /* points to draw data */ + const void *constants[PIPE_MAX_CONSTANT_BUFFERS]; /* points to draw data */ const struct aos_buffer *buffer; /* points to ? */ }; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index 3240e3745dd..0eda414ee6a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -219,10 +219,12 @@ static void PIPE_CDECL populate_lut( struct aos_machine *machine, } -void draw_vs_aos_machine_constants( struct aos_machine *machine, - const float (*constants)[4] ) +void +draw_vs_aos_machine_constants(struct aos_machine *machine, + unsigned slot, + const void *constants) { - machine->constants = constants; + machine->constants[slot] = constants; { unsigned i; @@ -307,8 +309,10 @@ void draw_vs_aos_machine_viewport( struct aos_machine *machine, { } -void draw_vs_aos_machine_constants( struct aos_machine *machine, - const float (*constants)[4] ) +void +draw_vs_aos_machine_constants(struct aos_machine *machine, + unsigned slot, + const void *constants) { } diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 41cc8026131..7deca2b69d9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -85,7 +85,7 @@ static void vs_exec_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -95,7 +95,9 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, unsigned int i, j; unsigned slot; - machine->Consts = constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->Consts[i] = constants[i]; + } for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index b3535c0e48e..5f7a645f5d8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -42,11 +42,8 @@ #ifdef MESA_LLVM -#include "gallivm/gallivm.h" - struct draw_llvm_vertex_shader { struct draw_vertex_shader base; - struct gallivm_prog *llvm_prog; struct tgsi_exec_machine *machine; }; @@ -58,23 +55,17 @@ vs_llvm_prepare( struct draw_vertex_shader *base, } - - static void vs_llvm_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) { struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - - gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine, - input, base->info.num_inputs, output, base->info.num_outputs, - constants, count, input_stride, output_stride); } @@ -121,27 +112,6 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.delete = vs_llvm_delete; vs->machine = draw->vs.machine; - { - struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); - gallivm_ir_set_layout(ir, GALLIVM_SOA); - gallivm_ir_set_components(ir, 4); - gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens); - vs->llvm_prog = gallivm_ir_compile(ir); - gallivm_ir_delete(ir); - } - - draw->vs.engine = gallivm_global_cpu_engine(); - - /* XXX: Why are there two versions of this? Shouldn't creating the - * engine be a separate operation to compiling a shader? - */ - if (!draw->vs.engine) { - draw->vs.engine = gallivm_cpu_engine_create(vs->llvm_prog); - } - else { - gallivm_cpu_jit_compile(draw->vs.engine, vs->llvm_prog); - } - return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ad184bd696d..d869eecec5e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -85,7 +85,7 @@ static void vs_ppc_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; - float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; - float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]; + PIPE_ALIGN_VAR(16) float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]; + PIPE_ALIGN_VAR(16) float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]; uint attr; /* convert (up to) four input verts to SoA format */ @@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, */ shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, - (float (*)[4]) constants, + (const float (*)[4])constants[0], ppc_builtin_constants); /* convert (up to) four output verts from SoA back to AoS format */ diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 702051387ac..54e6423388f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -83,7 +83,7 @@ static void vs_sse_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -112,7 +112,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base, /* run compiled shader */ shader->func(machine, - constants, + (const float (*)[4])constants[0], shader->base.immediates, input, base->info.num_inputs, diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index d16692584e5..5ed706cb4ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -38,7 +38,6 @@ #include "draw/draw_vertex.h" #include "draw/draw_vs.h" #include "translate/translate.h" -#include "translate/translate_cache.h" /* A first pass at incorporating vertex fetch/emit functionality into */ @@ -142,12 +141,13 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->fetch->run_elts( vsvg->fetch, elts, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); @@ -181,6 +181,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -203,12 +204,13 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->fetch->run( vsvg->fetch, start, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); @@ -239,6 +241,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -281,9 +284,11 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.nr_elements = key->nr_inputs; fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL; fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].instance_divisor = 0; fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; fetch.element[i].output_offset = i * 4 * sizeof(float); assert(fetch.element[i].output_offset < fetch.output_stride); @@ -295,17 +300,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, for (i = 0; i < key->nr_outputs; i++) { if (key->element[i].out.format != EMIT_1F_PSIZE) { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].instance_divisor = 0; emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; assert(emit.element[i].input_offset <= fetch.output_stride); } else { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; + emit.element[i].instance_divisor = 0; emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].output_offset = key->element[i].out.offset; } diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp deleted file mode 100644 index f4af5cc8ad5..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "instructions.h" -#include "loweringpass.h" -#include "storage.h" -#include "tgsitollvm.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_dump.h" - -#include <llvm/Module.h> -#include <llvm/CallingConv.h> -#include <llvm/Constants.h> -#include <llvm/DerivedTypes.h> -#include <llvm/Instructions.h> -#include <llvm/ModuleProvider.h> -#include <llvm/Pass.h> -#include <llvm/PassManager.h> -#include <llvm/Attributes.h> -#include <llvm/Support/PatternMatch.h> -#include <llvm/ExecutionEngine/JIT.h> -#include <llvm/ExecutionEngine/Interpreter.h> -#include <llvm/ExecutionEngine/GenericValue.h> -#include <llvm/Support/MemoryBuffer.h> -#include <llvm/LinkAllPasses.h> -#include <llvm/Analysis/Verifier.h> -#include <llvm/Analysis/LoopPass.h> -#include <llvm/Target/TargetData.h> -#include <llvm/Bitcode/ReaderWriter.h> -#include <llvm/Transforms/Utils/Cloning.h> - -#include <sstream> -#include <fstream> -#include <iostream> - -static int GLOBAL_ID = 0; - -using namespace llvm; - -static inline -void AddStandardCompilePasses(PassManager &PM) -{ - PM.add(new LoweringPass()); - PM.add(createVerifierPass()); // Verify that input is correct - - PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp - - //PM.add(createStripSymbolsPass(true)); - - PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst - PM.add(createCFGSimplificationPass()); // Clean up disgusting code - PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas - PM.add(createGlobalOptimizerPass()); // Optimize out global vars - PM.add(createGlobalDCEPass()); // Remove unused fns and globs - PM.add(createIPConstantPropagationPass());// IP Constant Propagation - PM.add(createDeadArgEliminationPass()); // Dead argument elimination - PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE - PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - - PM.add(createPruneEHPass()); // Remove dead EH info - - PM.add(createFunctionInliningPass()); // Inline small functions - PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - - PM.add(createTailDuplicationPass()); // Simplify cfg by copying code - PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas - PM.add(createInstructionCombiningPass()); // Combine silly seq's - PM.add(createCondPropagationPass()); // Propagate conditionals - - PM.add(createTailCallEliminationPass()); // Eliminate tail calls - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createReassociatePass()); // Reassociate expressions - PM.add(createLoopRotatePass()); - PM.add(createLICMPass()); // Hoist loop invariants - PM.add(createLoopUnswitchPass()); // Unswitch loops. - PM.add(createLoopIndexSplitPass()); // Index split loops. - PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc - PM.add(createIndVarSimplifyPass()); // Canonicalize indvars - PM.add(createLoopUnrollPass()); // Unroll small loops - PM.add(createInstructionCombiningPass()); // Clean up after the unroller - PM.add(createGVNPass()); // Remove redundancies - PM.add(createSCCPPass()); // Constant prop with SCCP - - // Run instcombine after redundancy elimination to exploit opportunities - // opened up by them. - PM.add(createInstructionCombiningPass()); - PM.add(createCondPropagationPass()); // Propagate conditionals - - PM.add(createDeadStoreEliminationPass()); // Delete dead stores - PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations - PM.add(createDeadTypeEliminationPass()); // Eliminate dead types - PM.add(createConstantMergePass()); // Merge dup global constants -} - -void gallivm_prog_delete(struct gallivm_prog *prog) -{ - delete prog->module; - prog->module = 0; - prog->function = 0; - free(prog); -} - -static inline void -constant_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan) -{ - unsigned i; - - for (i = 0; i < QUAD_SIZE; ++i) { - inputs[i][attrib][chan] = coefs[attrib].a0[chan]; - } -} - -static inline void -linear_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - const float x = inputs[i][0][0]; - const float y = inputs[i][0][1]; - - inputs[i][attrib][chan] = - coefs[attrib].a0[chan] + - coefs[attrib].dadx[chan] * x + - coefs[attrib].dady[chan] * y; - } -} - -static inline void -perspective_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - const float x = inputs[i][0][0]; - const float y = inputs[i][0][1]; - /* WPOS.w here is really 1/w */ - const float w = 1.0f / inputs[i][0][3]; - assert(inputs[i][0][3] != 0.0); - - inputs[i][attrib][chan] = - (coefs[attrib].a0[chan] + - coefs[attrib].dadx[chan] * x + - coefs[attrib].dady[chan] * y) * w; - } -} - -void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) -{ - if (!ir || !ir->module) - return; - - if (file_prefix) { - std::ostringstream stream; - stream << file_prefix; - stream << ir->id; - stream << ".ll"; - std::string name = stream.str(); - std::ofstream out(name.c_str()); - if (!out) { - std::cerr<<"Can't open file : "<<stream.str()<<std::endl;; - return; - } - out << (*ir->module); - out.close(); - } else { - const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); - llvm::Module::FunctionListType::const_iterator itr; - std::cout<<"; ---------- Start shader "<<ir->id<<std::endl; - for (itr = funcs.begin(); itr != funcs.end(); ++itr) { - const llvm::Function &func = (*itr); - std::string name = func.getName(); - const llvm::Function *found = 0; - if (name.find("vs_shader") != std::string::npos || - name.find("fs_shader") != std::string::npos || - name.find("function") != std::string::npos) - found = &func; - if (found) { - std::cout<<*found<<std::endl; - } - } - std::cout<<"; ---------- End shader "<<ir->id<<std::endl; - } -} - - -void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, - float (*inputs)[16][4], - const struct tgsi_interp_coef *coef) -{ - for (int i = 0; i < prog->num_interp; ++i) { - const gallivm_interpolate &interp = prog->interpolators[i]; - switch (interp.type) { - case TGSI_INTERPOLATE_CONSTANT: - constant_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - case TGSI_INTERPOLATE_LINEAR: - linear_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - perspective_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - default: - assert( 0 ); - } - } -} - - -struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) -{ - struct gallivm_ir *ir = - (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); - ++GLOBAL_ID; - ir->id = GLOBAL_ID; - ir->type = type; - - return ir; -} - -void gallivm_ir_set_layout(struct gallivm_ir *ir, - enum gallivm_vector_layout layout) -{ - ir->layout = layout; -} - -void gallivm_ir_set_components(struct gallivm_ir *ir, int num) -{ - ir->num_components = num; -} - -void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, - const struct tgsi_token *tokens) -{ - std::cout << "Creating llvm from: " <<std::endl; - tgsi_dump(tokens, 0); - - llvm::Module *mod = tgsi_to_llvmir(ir, tokens); - ir->module = mod; - gallivm_ir_dump(ir, 0); -} - -void gallivm_ir_delete(struct gallivm_ir *ir) -{ - delete ir->module; - free(ir); -} - -struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) -{ - struct gallivm_prog *prog = - (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); - - std::cout << "Before optimizations:"<<std::endl; - ir->module->dump(); - std::cout<<"-------------------------------"<<std::endl; - - PassManager veri; - veri.add(createVerifierPass()); - veri.run(*ir->module); - llvm::Module *mod = llvm::CloneModule(ir->module); - prog->num_consts = ir->num_consts; - memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); - prog->num_interp = ir->num_interp; - - /* Run optimization passes over it */ - PassManager passes; - passes.add(new TargetData(mod)); - AddStandardCompilePasses(passes); - passes.run(*mod); - prog->module = mod; - - std::cout << "After optimizations:"<<std::endl; - mod->dump(); - - return prog; -} - -#endif /* MESA_LLVM */ diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h deleted file mode 100644 index 36a64a77471..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ /dev/null @@ -1,118 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ - -#ifndef GALLIVM_H -#define GALLIVM_H - -/* - LLVM representation consists of two stages - layout independent - intermediate representation gallivm_ir and driver specific - gallivm_prog. TGSI is first being translated into gallivm_ir - after that driver can set number of options on gallivm_ir and - have it compiled into gallivm_prog. gallivm_prog can be either - executed (assuming there's LLVM JIT backend for the current - target) or machine code generation can be done (assuming there's - a LLVM code generator for thecurrent target) - */ -#if defined __cplusplus -extern "C" { -#endif - -#include "pipe/p_state.h" - -#ifdef MESA_LLVM - -struct tgsi_token; - -struct gallivm_ir; -struct gallivm_prog; -struct gallivm_cpu_engine; -struct tgsi_interp_coef; -struct tgsi_sampler; -struct tgsi_exec_vector; - -enum gallivm_shader_type { - GALLIVM_VS, - GALLIVM_FS -}; - -enum gallivm_vector_layout { - GALLIVM_AOS, - GALLIVM_SOA -}; - -struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); -void gallivm_ir_set_layout(struct gallivm_ir *ir, - enum gallivm_vector_layout layout); -void gallivm_ir_set_components(struct gallivm_ir *ir, int num); -void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, - const struct tgsi_token *tokens); -void gallivm_ir_delete(struct gallivm_ir *ir); - - -struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); - -void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - const struct tgsi_interp_coef *coefs); -void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); - - -struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); -struct gallivm_cpu_engine *gallivm_global_cpu_engine(); -int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_machine *machine, - const float (*input)[4], - unsigned num_inputs, - float (*output)[4], - unsigned num_outputs, - const float (*constants)[4], - unsigned count, - unsigned input_stride, - unsigned output_stride); -int gallivm_cpu_fs_exec(struct gallivm_prog *prog, - float x, float y, - float (*dests)[PIPE_MAX_SHADER_INPUTS][4], - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - float (*consts)[4], - struct tgsi_sampler *samplers); -void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); -void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); - - -#endif /* MESA_LLVM */ - -#if defined __cplusplus -} -#endif - -#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp deleted file mode 100644 index 634bac01507..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ /dev/null @@ -1,140 +0,0 @@ -static const unsigned char llvm_builtins_data[] = { -0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x10,0x00,0x00, -0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, -0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, -0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, -0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, -0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x03,0x00,0x00,0x00,0x0b,0x84,0xff,0xff, -0xff,0xff,0x1f,0xc0,0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00, -0x32,0x22,0x48,0x09,0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45, -0xc6,0x05,0x42,0x52,0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83, -0x29,0x80,0x21,0x00,0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47, -0x80,0x50,0x2b,0x03,0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40, -0x14,0x01,0x80,0x11,0x80,0x22,0x88,0x00,0x13,0x30,0x7c,0xc0,0x03,0x3b,0xf8,0x05, -0x3b,0xa0,0x83,0x36,0xa8,0x07,0x77,0x58,0x07,0x77,0x78,0x87,0x7b,0x70,0x87,0x36, -0x60,0x87,0x74,0x70,0x87,0x7a,0xc0,0x87,0x36,0x38,0x07,0x77,0xa8,0x87,0x0d,0xf7, -0x50,0x0e,0x6d,0x00,0x0f,0x7a,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, -0x07,0x74,0xd0,0x06,0xe9,0x10,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, -0x78,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,0xe9,0x10,0x07,0x76,0xa0,0x07,0x71, -0x60,0x07,0x7a,0x10,0x07,0x76,0xd0,0x06,0xe9,0x30,0x07,0x72,0xa0,0x07,0x73,0x20, -0x07,0x7a,0x30,0x07,0x72,0xd0,0x06,0xe9,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07, -0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x7a, -0x30,0x07,0x72,0xd0,0x06,0xe6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, -0x07,0x74,0xd0,0x06,0xf6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,0x07, -0x74,0xd0,0x06,0xf6,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a, -0x10,0x07,0x72,0x80,0x07,0x6d,0x10,0x0e,0x70,0xa0,0x07,0x70,0xa0,0x07,0x76,0x40, -0x07,0x6d,0x60,0x0e,0x78,0x00,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07, -0x72,0x80,0x07,0x3a,0x0f,0x84,0x48,0x20,0x23,0x24,0x40,0x00,0x62,0x67,0x88,0x9f, -0x19,0x92,0x24,0x00,0x10,0x04,0x00,0x00,0x00,0x43,0x92,0x04,0x08,0x00,0x00,0x00, -0x00,0x60,0x48,0xa2,0x00,0x40,0x10,0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x02, -0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x41,0x00,0x00,0x00,0x30,0x24,0x61,0x80,0x00, -0x00,0x00,0x00,0x00,0x86,0x24,0x07,0x10,0x00,0x00,0x00,0x00,0xc0,0x90,0x44,0x01, -0x80,0x20,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, -0x05,0x00,0x82,0x00,0x00,0x00,0x60,0x48,0x52,0x00,0x40,0x10,0x00,0x00,0x00,0x64, -0x81,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, -0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, -0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, -0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, -0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, -0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, -0x04,0x00,0x00,0x00,0xc3,0x0d,0xce,0x43,0x4c,0x37,0x3c,0x8e,0x34,0xdc,0x30,0x41, -0xc2,0x74,0x03,0x34,0x51,0xc3,0x0d,0x4d,0x44,0x4c,0x37,0x44,0x8d,0x35,0x56,0x01, -0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0xd6,0x10,0x00,0xe6, -0x10,0x04,0x76,0x81,0x00,0x3e,0x30,0x0c,0x91,0x4f,0x1b,0x05,0x21,0x30,0x8f,0x6d, -0x13,0x48,0xe0,0x03,0xc3,0x10,0xf9,0xb4,0x55,0x20,0x81,0x0f,0x0c,0x43,0xe4,0xd7, -0x66,0x41,0x08,0xcc,0xa3,0x1f,0x40,0x41,0x34,0x53,0x84,0x99,0xc4,0x20,0x30,0x8f, -0x61,0x10,0x02,0xb0,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x27,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, -0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, -0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, -0x63,0x08,0x0d,0x34,0xdc,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, -0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, -0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x27,0x50,0x20,0x05, -0xd1,0x0c,0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d, -0x14,0x13,0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4, -0xba,0x35,0x0c,0x13,0xf3,0xd8,0x05,0x31,0x31,0x8f,0x6e,0x1c,0x84,0x00,0x2c,0xcb, -0x01,0x14,0x44,0x33,0x45,0x98,0x61,0x0c,0x02,0xf3,0x00,0x00,0x00,0x00,0x00,0x00, -0x61,0x20,0x00,0x00,0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91, -0x11,0x00,0x00,0x00,0x63,0x08,0x4d,0x64,0x16,0xc1,0xe1,0x86,0xab,0x22,0x66,0x19, -0x02,0x01,0x1b,0x43,0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81, -0x0a,0x20,0x0b,0x34,0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x70, -0x83,0x57,0x11,0xb3,0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0, -0x48,0xb3,0x04,0xc6,0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48, -0x63,0x08,0xcd,0x64,0x64,0x40,0x70,0xb8,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc, -0x60,0x0c,0xc1,0x99,0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80, -0x33,0x38,0xd0,0x00,0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80, -0xe0,0x70,0x03,0x1b,0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12, -0x30,0x63,0x08,0x0f,0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7, -0xa0,0x06,0x70,0x00,0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x76,0x52,0x4c,0xcc,0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46, -0xc6,0x50,0x8a,0x89,0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89, -0x79,0x68,0x73,0x20,0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62, -0x9e,0xdb,0x32,0x88,0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98, -0xa7,0xb7,0x95,0x62,0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a, -0x34,0x35,0x56,0x62,0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25, -0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7, -0x24,0x01,0x63,0xec,0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9, -0xfc,0xc4,0xd0,0x90,0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6, -0xc1,0x71,0x7b,0x29,0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01, -0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08, -0x4e,0x33,0x58,0x47,0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e, -0x33,0xe1,0xbc,0xa5,0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e, -0x3a,0x40,0xc6,0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee, -0x6f,0x20,0x11,0x9c,0x66,0xf0,0x05,0xa7,0x99,0xec,0x82,0x10,0x9c,0xa6,0x32,0x93, -0x42,0x60,0x1e,0x7b,0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a, -0xa7,0x6d,0xa4,0x98,0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10, -0x9c,0x66,0xc0,0x7b,0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06, -0x8b,0xe0,0x34,0x83,0x2f,0x38,0xcd,0x64,0xd3,0x07,0x50,0x10,0xcd,0x14,0x61,0xe6, -0x61,0x08,0x4e,0x53,0xd5,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00, -0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10, -0x1e,0xe1,0x19,0xc6,0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63, -0x15,0xc1,0x31,0x84,0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1, -0x71,0x6c,0x23,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48, -0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58, -0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7, -0x30,0x06,0x64,0xe0,0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82, -0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00, -0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb, -0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6, -0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5, -0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18, -0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f, -0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59, -0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8, -0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86, -0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72, -0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33, -0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3, -0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x26,0x41,0x08,0xc0, -0xb2,0x18,0x45,0x21,0x00,0xcb,0xb2,0x5b,0x04,0x31,0x31,0x8f,0x6d,0x13,0xc4,0xc4, -0x3c,0xb9,0x35,0x0c,0x15,0xb0,0x58,0x05,0x31,0x31,0x4f,0x7f,0x00,0x05,0xd1,0x4c, -0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00, -0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04, -0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca, -0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c, -0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x76,0x51,0x4c,0xcc, -0x53,0xdb,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31, -0x31,0x8f,0x6e,0x16,0xc4,0xc4,0x3c,0xbd,0x51,0x10,0x02,0xb0,0x2c,0xd6,0x30,0x54, -0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x2c,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34, -0xdc,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xdc,0x50,0x49,0xc4, -0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xdc,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60, -0x63,0x08,0x8d,0x33,0xdc,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00, -0x17,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb, -0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x76, -0x41,0x4c,0xcc,0xb3,0x1f,0x81,0x11,0x11,0x13,0x15,0x35,0x37,0x90,0x2c,0x4e,0xf4, -0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44, -0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0, -0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03, -0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, -0x23,0x19,0xc3,0xa0,0x20,0x8b,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, -0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, -0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, -0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp deleted file mode 100644 index 1bd00a0c2a6..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "instructions.h" -#include "loweringpass.h" -#include "storage.h" -#include "tgsitollvm.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_dump.h" - -#include "util/u_memory.h" -#include "util/u_math.h" - -#include <llvm/Module.h> -#include <llvm/CallingConv.h> -#include <llvm/Constants.h> -#include <llvm/DerivedTypes.h> -#include <llvm/Instructions.h> -#include <llvm/ModuleProvider.h> -#include <llvm/Pass.h> -#include <llvm/PassManager.h> -#include <llvm/Attributes.h> -#include <llvm/Support/PatternMatch.h> -#include <llvm/ExecutionEngine/JIT.h> -#include <llvm/ExecutionEngine/Interpreter.h> -#include <llvm/ExecutionEngine/GenericValue.h> -#include <llvm/Support/MemoryBuffer.h> -#include <llvm/LinkAllPasses.h> -#include <llvm/Analysis/Verifier.h> -#include <llvm/Analysis/LoopPass.h> -#include <llvm/Target/TargetData.h> -#include <llvm/Bitcode/ReaderWriter.h> -#include <llvm/Transforms/Utils/Cloning.h> - -#include <sstream> -#include <fstream> -#include <iostream> - -struct gallivm_cpu_engine { - llvm::ExecutionEngine *engine; -}; - -static struct gallivm_cpu_engine *CPU = 0; - -typedef int (*fragment_shader_runner)(float x, float y, - float (*dests)[16][4], - float (*inputs)[16][4], - int num_attribs, - float (*consts)[4], int num_consts, - struct tgsi_sampler *samplers); - -int gallivm_cpu_fs_exec(struct gallivm_prog *prog, - float fx, float fy, - float (*dests)[16][4], - float (*inputs)[16][4], - float (*consts)[4], - struct tgsi_sampler *samplers) -{ - fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function); - assert(runner); - - return runner(fx, fy, dests, inputs, prog->num_interp, - consts, prog->num_consts, - samplers); -} - -static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) -{ - llvm::Module *mod = prog->module; - llvm::Function *func = 0; - - switch (prog->type) { - case GALLIVM_VS: - func = mod->getFunction("vs_shader"); - break; - case GALLIVM_FS: - func = mod->getFunction("fs_shader"); - break; - default: - assert(!"Unknown shader type!"); - break; - } - return func; -} - -/*! - This function creates a CPU based execution engine for the given gallivm_prog. - gallivm_cpu_engine should be used as a singleton throughout the library. Before - executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. - The gallivm_prog instance which is being passed to the constructor is being - automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile - with it again. - */ -struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) -{ - struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) - calloc(1, sizeof(struct gallivm_cpu_engine)); - llvm::Module *mod = static_cast<llvm::Module*>(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); - ee->DisableLazyCompilation(); - cpu->engine = ee; - - llvm::Function *func = func_for_shader(prog); - - prog->function = ee->getPointerToFunction(func); - CPU = cpu; - return cpu; -} - - -/*! - This function JIT compiles the given gallivm_prog with the given cpu based execution engine. - The reference to the generated machine code entry point will be stored - in the gallivm_prog program. After executing this function one can call gallivm_prog_exec - in order to execute the gallivm_prog on the CPU. - */ -void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) -{ - llvm::Module *mod = static_cast<llvm::Module*>(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = cpu->engine; - assert(ee); - /*FIXME : why was this disabled ? we need it for pow/sqrt/... */ - ee->DisableLazyCompilation(false); - ee->addModuleProvider(mp); - - llvm::Function *func = func_for_shader(prog); - prog->function = ee->getPointerToFunction(func); -} - -void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) -{ - free(cpu); -} - -struct gallivm_cpu_engine * gallivm_global_cpu_engine() -{ - return CPU; -} - - -typedef void (*vertex_shader_runner)(void *ainputs, - void *dests, - float (*aconsts)[4]); - -#define MAX_TGSI_VERTICES 4 -/*! - This function is used to execute the gallivm_prog in software. Before calling - this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile - function. - */ -int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_machine *machine, - const float (*input)[4], - unsigned num_inputs, - float (*output)[4], - unsigned num_outputs, - const float (*constants)[4], - unsigned count, - unsigned input_stride, - unsigned output_stride ) -{ - unsigned int i, j; - unsigned slot; - vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); - assert(runner); - - for (i = 0; i < count; i += MAX_TGSI_VERTICES) { - unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - - /* Swizzle inputs. - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < num_inputs; slot++) { - machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; - machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; - machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; - machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; - } - - input = (const float (*)[4])((const char *)input + input_stride); - } - - /* run shader */ - runner(machine->Inputs, - machine->Outputs, - (float (*)[4]) constants); - - /* Unswizzle all output results - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < num_outputs; slot++) { - output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - output = (float (*)[4])((char *)output + output_stride); - } - } - - return 0; -} - -#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h deleted file mode 100644 index d2c5852bdf7..00000000000 --- a/src/gallium/auxiliary/gallivm/gallivm_p.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef GALLIVM_P_H -#define GALLIVM_P_H - -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_compiler.h" - -namespace llvm { - class Module; -} - -#if defined __cplusplus -extern "C" { -#endif - -enum gallivm_shader_type; -enum gallivm_vector_layout; - -struct gallivm_interpolate { - int attrib; - int chan; - int type; -}; - -struct gallivm_ir { - llvm::Module *module; - int id; - enum gallivm_shader_type type; - enum gallivm_vector_layout layout; - int num_components; - int num_consts; - - /* FIXME: this might not be enough for some shaders */ - struct gallivm_interpolate interpolators[32*4]; - int num_interp; -}; - -struct gallivm_prog { - llvm::Module *module; - void *function; - - int id; - enum gallivm_shader_type type; - - int num_consts; - - /* FIXME: this might not be enough for some shaders */ - struct gallivm_interpolate interpolators[32*4]; - int num_interp; -}; - -static INLINE void gallivm_swizzle_components(int swizzle, - int *xc, int *yc, - int *zc, int *wc) -{ - int x = swizzle / 1000; swizzle -= x * 1000; - int y = swizzle / 100; swizzle -= y * 100; - int z = swizzle / 10; swizzle -= z * 10; - int w = swizzle; - - if (xc) *xc = x; - if (yc) *yc = y; - if (zc) *zc = z; - if (wc) *wc = w; -} - -static INLINE boolean gallivm_is_swizzle(int swizzle) -{ - const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + - TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; - return swizzle != NO_SWIZZLE; -} - -static INLINE int gallivm_x_swizzle(int swizzle) -{ - int x; - gallivm_swizzle_components(swizzle, &x, 0, 0, 0); - return x; -} - -static INLINE int gallivm_y_swizzle(int swizzle) -{ - int y; - gallivm_swizzle_components(swizzle, 0, &y, 0, 0); - return y; -} - -static INLINE int gallivm_z_swizzle(int swizzle) -{ - int z; - gallivm_swizzle_components(swizzle, 0, 0, &z, 0); - return z; -} - -static INLINE int gallivm_w_swizzle(int swizzle) -{ - int w; - gallivm_swizzle_components(swizzle, 0, 0, 0, &w); - return w; -} - -#if defined __cplusplus -} -#endif - -#endif /* MESA_LLVM */ - -#endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp deleted file mode 100644 index ee8162efce5..00000000000 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ /dev/null @@ -1,1193 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ -#ifdef MESA_LLVM - -#include "instructions.h" - -#include "storage.h" - -#include "util/u_memory.h" - -#include <llvm/CallingConv.h> -#include <llvm/Constants.h> -#include <llvm/DerivedTypes.h> -#include <llvm/Function.h> -#include <llvm/InstrTypes.h> -#include <llvm/Instructions.h> -#include <llvm/Attributes.h> -#include <llvm/Support/MemoryBuffer.h> -#include <llvm/Bitcode/ReaderWriter.h> - -#include <sstream> -#include <fstream> -#include <iostream> - -using namespace llvm; - -#include "gallivm_builtins.cpp" - -#if 0 -llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorArray = ArrayType::get(vectorType, 4); -} -#endif - -static inline std::string createFuncName(int label) -{ - std::ostringstream stream; - stream << "function"; - stream << label; - return stream.str(); -} - -Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, - Storage *storage) - : m_mod(mod), m_func(func), m_builder(block), m_idx(0), - m_storage(storage) -{ - m_floatVecType = VectorType::get(Type::FloatTy, 4); - - m_llvmFSqrt = 0; - m_llvmFAbs = 0; - m_llvmPow = 0; - m_llvmFloor = 0; - m_llvmFlog = 0; - m_llvmFexp = 0; - m_llvmLit = 0; - m_fmtPtr = 0; - - MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( - (const char*)&llvm_builtins_data[0], - (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]); - m_mod = ParseBitcodeFile(buffer); -} - -llvm::BasicBlock * Instructions::currentBlock() const -{ - return m_builder.GetInsertBlock(); -} - -llvm::Value * Instructions::abs(llvm::Value *in) -{ - std::vector<llvm::Value*> vec = extractVector(in); - Value *xabs = callFAbs(vec[0]); - Value *yabs = callFAbs(vec[1]); - Value *zabs = callFAbs(vec[2]); - Value *wabs = callFAbs(vec[3]); - return vectorFromVals(xabs, yabs, zabs, wabs); -} - -llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateAdd(in1, in2, name("add")); -} - -llvm::Value * Instructions::arl(llvm::Value *in) -{ - return floor(in); -} - -void Instructions::beginLoop() -{ - BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); - BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); - - m_builder.CreateBr(begin); - Loop loop; - loop.begin = begin; - loop.end = end; - m_builder.SetInsertPoint(begin); - m_loopStack.push(loop); -} - -void Instructions::bgnSub(unsigned label) -{ - llvm::Function *func = findFunction(label); - - Function::arg_iterator args = func->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("INPUT"); - m_storage->pushArguments(ptr_INPUT); - - llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); - - m_func = func; - m_builder.SetInsertPoint(entry); -} - -void Instructions::brk() -{ - assert(!m_loopStack.empty()); - BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); - m_builder.CreateBr(m_loopStack.top().end); - m_builder.SetInsertPoint(unr); -} - -void Instructions::cal(int label, llvm::Value *input) -{ - std::vector<Value*> params; - params.push_back(input); - llvm::Function *func = findFunction(label); - - m_builder.CreateCall(func, params.begin(), params.end()); -} - -llvm::Value * Instructions::ceil(llvm::Value *in) -{ - std::vector<llvm::Value*> vec = extractVector(in); - return vectorFromVals(callCeil(vec[0]), callCeil(vec[1]), - callCeil(vec[2]), callCeil(vec[3])); -} - -llvm::Value * Instructions::clamp(llvm::Value *in1) -{ - llvm::Value *zero = constVector(0.0f, 0.0f, 0.0f, 0.0f); - llvm::Value *one = constVector(1.0f, 1.0f, 1.0f, 1.0f); - return min( max(zero, in1), one); -} - -llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - llvm::Function *func = m_mod->getFunction("cmp"); - assert(func); - - std::vector<Value*> params; - params.push_back(in1); - params.push_back(in2); - params.push_back(in3); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - std::vector<llvm::Value*> vec3 = extractVector(in3); - Constant *half = ConstantFP::get(APFloat(0.5f)); - - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], half, name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], half, name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], half, name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], half, name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - std::vector<llvm::Value*> vec3 = extractVector(in3); - Constant *zero = Constant::getNullValue(Type::FloatTy); - - Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], zero, name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], zero, name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], zero, name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], zero, name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::cos(llvm::Value *in) -{ -#if 0 - llvm::Function *func = m_mod->getFunction("vcos"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("cosres")); - call->setTailCall(false); - return call; -#else - std::vector<llvm::Value*> elems = extractVector(in); - Function *func = m_mod->getFunction("cosf"); - assert(func); - CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); - cos->setCallingConv(CallingConv::C); - cos->setTailCall(true); - return vectorFromVals(cos, cos, cos, cos); -#endif -} - -llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z1")); - - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *z2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(2), - name("z2")); - Value *y1z2 = mul(y1, z2); - Value *z1y2 = mul(z1, y2); - - Value *z1x2 = mul(z1, x2); - Value *x1z2 = mul(x1, z2); - - Value *x1y2 = mul(x1, y2); - Value *y1x2 = mul(y1, x2); - - return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); -} - -llvm::Value * Instructions::ddx(llvm::Value *in) -{ - // FIXME - assert(0); -} - -llvm::Value * Instructions::ddy(llvm::Value *in) -{ - // FIXME - assert(0); -} - -llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateFDiv(in1, in2, name("div")); -} - -llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *z = m_builder.CreateExtractElement(in3, - m_storage->constantInt(2), - name("extractz")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - Value *dot2add = m_builder.CreateAdd(xy, z, name("dot2add")); - return vectorFromVals(dot2add, dot2add, dot2add, dot2add); -} - -llvm::Value * Instructions::dp2(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - return vectorFromVals(xy, xy, xy, xy); -} - -llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *z = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(2), - name("extractz")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); - return vectorFromVals(dot3, dot3, dot3, dot3); -} - -llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - std::vector<llvm::Value*> vec = extractVector(mulRes); - Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy")); - Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz")); - Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4")); - return vectorFromVals(dot4, dot4, dot4, dot4); -} - -llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - std::vector<llvm::Value*> vec1 = extractVector(mulRes); - Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy")); - Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz")); - Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph")); - return vectorFromVals(dph, dph, dph, dph); -} - -llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) -{ - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *w = m_builder.CreateExtractElement(in2, - m_storage->constantInt(3), - name("w")); - Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); - return vectorFromVals(ConstantFP::get(APFloat(1.f)), - ry, z, w); -} - -void Instructions::elseop() -{ - assert(!m_ifStack.empty()); - BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); - m_builder.CreateBr(ifend); - m_builder.SetInsertPoint(m_ifStack.top()); - currentBlock()->setName(name("ifelse")); - m_ifStack.pop(); - m_ifStack.push(ifend); -} - -void Instructions::endif() -{ - assert(!m_ifStack.empty()); - m_builder.CreateBr(m_ifStack.top()); - m_builder.SetInsertPoint(m_ifStack.top()); - m_ifStack.pop(); -} - -void Instructions::endLoop() -{ - assert(!m_loopStack.empty()); - Loop loop = m_loopStack.top(); - m_builder.CreateBr(loop.begin); - loop.end->moveAfter(currentBlock()); - m_builder.SetInsertPoint(loop.end); - m_loopStack.pop(); -} - -void Instructions::end() -{ - m_builder.CreateRetVoid(); -} - -void Instructions::endSub() -{ - m_func = 0; - m_builder.SetInsertPoint(0); -} - -llvm::Value * Instructions::exp(llvm::Value *in) -{ - std::vector<llvm::Value*> vec = extractVector(in); - return vectorFromVals(callFExp(vec[0]), callFExp(vec[1]), - callFExp(vec[2]), callFExp(vec[3])); -} - -llvm::Value * Instructions::ex2(llvm::Value *in) -{ - llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)), - m_builder.CreateExtractElement( - in, m_storage->constantInt(0), - name("x1"))); - return vectorFromVals(val, val, val, val); -} - -llvm::Value * Instructions::floor(llvm::Value *in) -{ - std::vector<llvm::Value*> vec = extractVector(in); - return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]), - callFloor(vec[2]), callFloor(vec[3])); -} - -llvm::Value * Instructions::frc(llvm::Value *in) -{ - llvm::Value *flr = floor(in); - return sub(in, flr); -} - -void Instructions::ifop(llvm::Value *in) -{ - BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); - BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); - - //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); - //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); - //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); - - Constant *float0 = Constant::getNullValue(Type::FloatTy); - - Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), - name("extractx")); - Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); - m_builder.CreateCondBr(xcmp, ifthen, ifend); - //m_builder.SetInsertPoint(yblock); - - m_builder.SetInsertPoint(ifthen); - m_ifStack.push(ifend); -} - -llvm::Value * Instructions::kil(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("kil"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) -{ - llvm::Value *m = mul(in1, in2); - llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); - llvm::Value *s = sub(vec1, in1); - return add(m, mul(s, in3)); -} - -llvm::Value * Instructions::lg2(llvm::Value *in) -{ - std::vector<llvm::Value*> vec = extractVector(in); - llvm::Value *const_vec = constVector(1.442695f, 1.442695f, - 1.442695f, 1.442695f); - return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), - callFLog(vec[2]), callFLog(vec[3])), const_vec); -} - -llvm::Value * Instructions::lit(llvm::Value *in) -{ - if (!m_llvmLit) { - m_llvmLit = m_mod->getFunction("lit"); - } - CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::log(llvm::Value *in) -{ - std::vector<llvm::Value*> vec = extractVector(in); - return vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), - callFLog(vec[2]), callFLog(vec[3])); -} - -llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) -{ - Value *mulRes = mul(in1, in2); - return add(mulRes, in3); -} - -llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) -{ - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], - name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], - name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], - name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], - name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) -{ - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateMul(in1, in2, name("mul")); -} - -llvm::Value * Instructions::neg(llvm::Value *in) -{ - Value *neg = m_builder.CreateNeg(in, name("neg")); - return neg; -} - -llvm::Value * Instructions::nrm(llvm::Value *in) -{ - llvm::Value *v = rsq(in); - return mul(v, in); -} - -llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - llvm::Value *val = callPow(x1, x2); - return vectorFromVals(val, val, val, val); -} - -llvm::Value * Instructions::rcp(llvm::Value *in1) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - x1, name("rcp")); - return vectorFromVals(res, res, res, res); -} - -llvm::Value * Instructions::rsq(llvm::Value *in1) -{ - Value *x = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("extractx")); - Value *abs = callFAbs(x); - Value *sqrt = callFSqrt(abs); - - Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - sqrt, - name("rsqrt")); - return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); -} - -llvm::Value * Instructions::scs(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("scs"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("scsres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::seq(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOEQ(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOEQ(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOEQ(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOEQ(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sfl(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - return vectorFromVals(const0f, const0f, const0f, const0f); -} - -llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sin(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("vsin"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("sinres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::sle(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLE(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOLE(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOLE(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOLE(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::sne(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpONE(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpONE(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpONE(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpONE(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::str(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); - - return vectorFromVals(const1f, const1f, const1f, const1f); -} - -llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) -{ - Value *res = m_builder.CreateSub(in1, in2, name("sub")); - return res; -} - -llvm::Value * Instructions::trunc(llvm::Value *in) -{ - std::vector<llvm::Value*> vec = extractVector(in); - Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), - name("ftoix")); - Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), - name("ftoiy")); - Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), - name("ftoiz")); - Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), - name("ftoiw")); - Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, - name("fx")); - Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, - name("fy")); - Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, - name("fz")); - Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, - name("fw")); - return vectorFromVals(fx, fy, fz, fw); -} - -llvm::Value * Instructions::x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - std::vector<llvm::Value*> vec1 = extractVector(in1); - std::vector<llvm::Value*> vec2 = extractVector(in2); - std::vector<llvm::Value*> vec3 = extractVector(in3); - - Value *x2x3 = m_builder.CreateMul( vec2[0], vec3[0], name("x2x3")); - Value *y2y3 = m_builder.CreateMul( vec2[1], vec3[1], name("y2y3")); - Value *x1px2x3 = m_builder.CreateAdd (vec1[0], x2x3, name("x1 + x2x3")); - Value *x1px2x3py2y3 = m_builder.CreateAdd (x1px2x3, y2y3, name("x1 + x2x3 + y2y3")); - - Value *x2z3 = m_builder.CreateMul( vec2[0], vec3[2], name("x2z3")); - Value *y2w3 = m_builder.CreateMul( vec2[1], vec3[3], name("y2w3")); - Value *y1px2z3 = m_builder.CreateAdd (vec1[1], x2z3, name("y1 + x2z3")); - Value *y1px2z3py2w3 = m_builder.CreateAdd (y1px2z3, y2w3, name("y1 + x2z3 + y2w3")); - - return vectorFromVals(x1px2x3py2y3, y1px2z3py2w3, x1px2x3py2y3, y1px2z3py2w3); -} - -void Instructions::printVector(llvm::Value *val) -{ - static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; - - if (!m_fmtPtr) { - Constant *format = ConstantArray::get(frmt, true); - ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); - GlobalVariable* globalFormat = new GlobalVariable( - /*Type=*/arrayTy, - /*isConstant=*/true, - /*Linkage=*/GlobalValue::InternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name(".str"), - m_mod); - globalFormat->setInitializer(format); - - Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); - std::vector<Constant*> const_ptr_21_indices; - const_ptr_21_indices.push_back(const_int0); - const_ptr_21_indices.push_back(const_int0); - m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, - &const_ptr_21_indices[0], const_ptr_21_indices.size()); - } - - Function *func_printf = m_mod->getFunction("printf"); - if (!func_printf) - func_printf = declarePrintf(); - assert(func_printf); - std::vector<llvm::Value*> vec = extractVector(val); - Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); - Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); - Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); - Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); - std::vector<Value*> params; - params.push_back(m_fmtPtr); - params.push_back(dx); - params.push_back(dy); - params.push_back(dz); - params.push_back(dw); - CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), - name("printf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(true); -} - -const char * Instructions::name(const char *prefix) -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::Value * Instructions::callCeil(llvm::Value *val) -{ - if (!m_llvmCeil) { - // predeclare the intrinsic - std::vector<const Type*> ceilArgs; - ceilArgs.push_back(Type::FloatTy); - AttrListPtr ceilPal; - FunctionType* ceilType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/ceilArgs, - /*isVarArg=*/false); - m_llvmCeil = Function::Create( - /*Type=*/ceilType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"ceilf", m_mod); - m_llvmCeil->setCallingConv(CallingConv::C); - m_llvmCeil->setAttributes(ceilPal); - } - CallInst *call = m_builder.CreateCall(m_llvmCeil, val, - name("ceilf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value *Instructions::callFAbs(llvm::Value *val) -{ - if (!m_llvmFAbs) { - // predeclare the intrinsic - std::vector<const Type*> fabsArgs; - fabsArgs.push_back(Type::FloatTy); - AttrListPtr fabsPal; - FunctionType* fabsType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fabsArgs, - /*isVarArg=*/false); - m_llvmFAbs = Function::Create( - /*Type=*/fabsType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"fabs", m_mod); - m_llvmFAbs->setCallingConv(CallingConv::C); - m_llvmFAbs->setAttributes(fabsPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, - name("fabs")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callFExp(llvm::Value *val) -{ - if (!m_llvmFexp) { - // predeclare the intrinsic - std::vector<const Type*> fexpArgs; - fexpArgs.push_back(Type::FloatTy); - AttrListPtr fexpPal; - FunctionType* fexpType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fexpArgs, - /*isVarArg=*/false); - m_llvmFexp = Function::Create( - /*Type=*/fexpType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"expf", m_mod); - m_llvmFexp->setCallingConv(CallingConv::C); - m_llvmFexp->setAttributes(fexpPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFexp, val, - name("expf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callFLog(llvm::Value *val) -{ - if (!m_llvmFlog) { - // predeclare the intrinsic - std::vector<const Type*> flogArgs; - flogArgs.push_back(Type::FloatTy); - AttrListPtr flogPal; - FunctionType* flogType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/flogArgs, - /*isVarArg=*/false); - m_llvmFlog = Function::Create( - /*Type=*/flogType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"logf", m_mod); - m_llvmFlog->setCallingConv(CallingConv::C); - m_llvmFlog->setAttributes(flogPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFlog, val, - name("logf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callFloor(llvm::Value *val) -{ - if (!m_llvmFloor) { - // predeclare the intrinsic - std::vector<const Type*> floorArgs; - floorArgs.push_back(Type::FloatTy); - AttrListPtr floorPal; - FunctionType* floorType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/floorArgs, - /*isVarArg=*/false); - m_llvmFloor = Function::Create( - /*Type=*/floorType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"floorf", m_mod); - m_llvmFloor->setCallingConv(CallingConv::C); - m_llvmFloor->setAttributes(floorPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFloor, val, - name("floorf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value *Instructions::callFSqrt(llvm::Value *val) -{ - if (!m_llvmFSqrt) { - // predeclare the intrinsic - std::vector<const Type*> fsqrtArgs; - fsqrtArgs.push_back(Type::FloatTy); - AttrListPtr fsqrtPal; - FunctionType* fsqrtType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fsqrtArgs, - /*isVarArg=*/false); - m_llvmFSqrt = Function::Create( - /*Type=*/fsqrtType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.sqrt.f32", m_mod); - m_llvmFSqrt->setCallingConv(CallingConv::C); - m_llvmFSqrt->setAttributes(fsqrtPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, - name("sqrt")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) -{ - if (!m_llvmPow) { - // predeclare the intrinsic - std::vector<const Type*> powArgs; - powArgs.push_back(Type::FloatTy); - powArgs.push_back(Type::FloatTy); - AttrListPtr powPal; - FunctionType* powType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/powArgs, - /*isVarArg=*/false); - m_llvmPow = Function::Create( - /*Type=*/powType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.pow.f32", m_mod); - m_llvmPow->setCallingConv(CallingConv::C); - m_llvmPow->setAttributes(powPal); - } - std::vector<Value*> params; - params.push_back(val1); - params.push_back(val2); - CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), - name("pow")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) -{ - Constant *const_vec = Constant::getNullValue(m_floatVecType); - Value *res = m_builder.CreateInsertElement(const_vec, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; -} - -llvm::Value * Instructions::constVector(float x, float y, float z, float w) -{ - std::vector<Constant*> vec(4); - vec[0] = ConstantFP::get(APFloat(x)); - vec[1] = ConstantFP::get(APFloat(y)); - vec[2] = ConstantFP::get(APFloat(z)); - vec[3] = ConstantFP::get(APFloat(w)); - return ConstantVector::get(m_floatVecType, vec); -} - -llvm::Function * Instructions::declarePrintf() -{ - std::vector<const Type*> args; - AttrListPtr params; - FunctionType* funcTy = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/args, - /*isVarArg=*/true); - Function* func_printf = Function::Create( - /*Type=*/funcTy, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", m_mod); - func_printf->setCallingConv(CallingConv::C); - func_printf->setAttributes(params); - return func_printf; -} - -llvm::Function * Instructions::declareFunc(int label) -{ - PointerType *vecPtr = PointerType::getUnqual(m_floatVecType); - std::vector<const Type*> args; - args.push_back(vecPtr); - args.push_back(vecPtr); - args.push_back(vecPtr); - args.push_back(vecPtr); - AttrListPtr params; - FunctionType *funcType = FunctionType::get( - /*Result=*/Type::VoidTy, - /*Params=*/args, - /*isVarArg=*/false); - std::string name = createFuncName(label); - Function *func = Function::Create( - /*Type=*/funcType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/name.c_str(), m_mod); - func->setCallingConv(CallingConv::C); - func->setAttributes(params); - return func; -} - -llvm::Function * Instructions::findFunction(int label) -{ - llvm::Function *func = m_functions[label]; - if (!func) { - func = declareFunc(label); - m_functions[label] = func; - } - return func; -} - -std::vector<llvm::Value*> Instructions::extractVector(llvm::Value *vec) -{ - std::vector<llvm::Value*> elems(4); - elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0), - name("x")); - elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1), - name("y")); - elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2), - name("z")); - elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3), - name("w")); - return elems; -} - - -#endif //MESA_LLVM - - diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h deleted file mode 100644 index e18571251ee..00000000000 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ /dev/null @@ -1,175 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ - -#ifndef INSTRUCTIONS_H -#define INSTRUCTIONS_H - -#include <llvm/BasicBlock.h> -#include <llvm/Module.h> -#include <llvm/Value.h> -#include <llvm/Support/IRBuilder.h> - -#include <map> -#include <stack> - -namespace llvm { - class VectorType; - class Function; -} - -class Storage; - -class Instructions -{ -public: - Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, - Storage *storage); - - llvm::BasicBlock *currentBlock() const; - - llvm::Value *abs(llvm::Value *in1); - llvm::Value *add(llvm::Value *in1, llvm::Value *in2); - llvm::Value *arl(llvm::Value *in1); - void beginLoop(); - void bgnSub(unsigned); - void brk(); - void cal(int label, llvm::Value *input); - llvm::Value *ceil(llvm::Value *in); - llvm::Value *clamp(llvm::Value *in); - llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *cos(llvm::Value *in); - llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); - llvm::Value *ddx(llvm::Value *in); - llvm::Value *ddy(llvm::Value *in); - llvm::Value *div(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); - llvm::Value *dp2(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dst(llvm::Value *in1, llvm::Value *in2); - void elseop(); - void endif(); - void endLoop(); - void end(); - void endSub(); - llvm::Value *exp(llvm::Value *in); - llvm::Value *ex2(llvm::Value *in); - llvm::Value *floor(llvm::Value *in); - llvm::Value *frc(llvm::Value *in); - void ifop(llvm::Value *in); - llvm::Value *kil(llvm::Value *in); - llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3); - llvm::Value *lg2(llvm::Value *in); - llvm::Value *lit(llvm::Value *in); - llvm::Value *log(llvm::Value *in); - llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3); - llvm::Value *max(llvm::Value *in1, llvm::Value *in2); - llvm::Value *min(llvm::Value *in1, llvm::Value *in2); - llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); - llvm::Value *neg(llvm::Value *in); - llvm::Value *nrm(llvm::Value *in); - llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); - llvm::Value *rcp(llvm::Value *in); - llvm::Value *rsq(llvm::Value *in); - llvm::Value *scs(llvm::Value *in); - llvm::Value *seq(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sfl(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sin(llvm::Value *in); - llvm::Value *sle(llvm::Value *in1, llvm::Value *in2); - llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sne(llvm::Value *in1, llvm::Value *in2); - llvm::Value *str(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); - llvm::Value *trunc(llvm::Value *in); - llvm::Value *x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - - void printVector(llvm::Value *val); -private: - const char *name(const char *prefix); - - llvm::Value *callCeil(llvm::Value *val); - llvm::Value *callFAbs(llvm::Value *val); - llvm::Value *callFExp(llvm::Value *val); - llvm::Value *callFLog(llvm::Value *val); - llvm::Value *callFloor(llvm::Value *val); - llvm::Value *callFSqrt(llvm::Value *val); - llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); - - llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w=0); - - llvm::Value *constVector(float x, float y, float z, float w); - - llvm::Function *declarePrintf(); - llvm::Function *declareFunc(int label); - - llvm::Function *findFunction(int label); - - std::vector<llvm::Value*> extractVector(llvm::Value *vec); -private: - llvm::Module *m_mod; - llvm::Function *m_func; - char m_name[32]; - llvm::IRBuilder<> m_builder; - int m_idx; - - llvm::VectorType *m_floatVecType; - - llvm::Function *m_llvmCeil; - llvm::Function *m_llvmFSqrt; - llvm::Function *m_llvmFAbs; - llvm::Function *m_llvmPow; - llvm::Function *m_llvmFloor; - llvm::Function *m_llvmFlog; - llvm::Function *m_llvmFexp; - llvm::Function *m_llvmLit; - - llvm::Constant *m_fmtPtr; - - std::stack<llvm::BasicBlock*> m_ifStack; - struct Loop { - llvm::BasicBlock *begin; - llvm::BasicBlock *end; - }; - std::stack<Loop> m_loopStack; - std::map<int, llvm::Function*> m_functions; - Storage *m_storage; -}; - -#endif diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp deleted file mode 100644 index 721b7d2d833..00000000000 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ /dev/null @@ -1,525 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include <cstdio> -#include "instructionssoa.h" - -#include "storagesoa.h" - -#include "pipe/p_shader_tokens.h" -#include "util/u_memory.h" - -#include <llvm/CallingConv.h> -#include <llvm/Constants.h> -#include <llvm/Module.h> -#include <llvm/Function.h> -#include <llvm/Instructions.h> -#include <llvm/Transforms/Utils/Cloning.h> -#include <llvm/Attributes.h> -#include <llvm/Support/MemoryBuffer.h> -#include <llvm/Bitcode/ReaderWriter.h> - - -#include <iostream> - - -/* disable some warnings. this file is autogenerated */ -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif -using namespace llvm; -#include "gallivmsoabuiltins.cpp" -#if defined(__GNUC__) -#pragma GCC diagnostic warning "-Wunused-variable" -#endif - -InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, - llvm::BasicBlock *block, StorageSoa *storage) - : m_builder(block), - m_storage(storage), - m_idx(0) -{ - createFunctionMap(); - createBuiltins(); -} - -const char * InstructionsSoa::name(const char *prefix) const -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - Constant *constVector = Constant::getNullValue(vectorType); - Value *res = m_builder.CreateInsertElement(constVector, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; -} - -void InstructionsSoa::end() -{ - m_builder.CreateRetVoid(); -} - -std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) -{ - std::vector<llvm::Value*> res(4); - res[0] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(0), - name("extract1X")); - res[1] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(1), - name("extract2X")); - res[2] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(2), - name("extract3X")); - res[3] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(3), - name("extract4X")); - - return res; -} - -llvm::IRBuilder<>* InstructionsSoa::getIRBuilder() -{ - return &m_builder; -} - -void InstructionsSoa::createFunctionMap() -{ - m_functionsMap[TGSI_OPCODE_ABS] = "abs"; - m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; - m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; - m_functionsMap[TGSI_OPCODE_MIN] = "min"; - m_functionsMap[TGSI_OPCODE_MAX] = "max"; - m_functionsMap[TGSI_OPCODE_POW] = "pow"; - m_functionsMap[TGSI_OPCODE_LIT] = "lit"; - m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; - m_functionsMap[TGSI_OPCODE_SLT] = "slt"; -} - -void InstructionsSoa::createDependencies() -{ - { - std::vector<std::string> powDeps(2); - powDeps[0] = "powf"; - powDeps[1] = "powvec"; - m_builtinDependencies["pow"] = powDeps; - } - { - std::vector<std::string> absDeps(2); - absDeps[0] = "fabsf"; - absDeps[1] = "absvec"; - m_builtinDependencies["abs"] = absDeps; - } - { - std::vector<std::string> maxDeps(1); - maxDeps[0] = "maxvec"; - m_builtinDependencies["max"] = maxDeps; - } - { - std::vector<std::string> minDeps(1); - minDeps[0] = "minvec"; - m_builtinDependencies["min"] = minDeps; - } - { - std::vector<std::string> litDeps(4); - litDeps[0] = "minvec"; - litDeps[1] = "maxvec"; - litDeps[2] = "powf"; - litDeps[3] = "powvec"; - m_builtinDependencies["lit"] = litDeps; - } - { - std::vector<std::string> rsqDeps(4); - rsqDeps[0] = "sqrtf"; - rsqDeps[1] = "sqrtvec"; - rsqDeps[2] = "fabsf"; - rsqDeps[3] = "absvec"; - m_builtinDependencies["rsq"] = rsqDeps; - } -} - -llvm::Function * InstructionsSoa::function(int op) -{ - if (m_functions.find(op) != m_functions.end()) - return m_functions[op]; - - std::string name = m_functionsMap[op]; - - std::cout <<"For op = "<<op<<", func is '"<<name<<"'"<<std::endl; - - std::vector<std::string> deps = m_builtinDependencies[name]; - for (unsigned int i = 0; i < deps.size(); ++i) { - llvm::Function *func = m_builtins->getFunction(deps[i]); - std::cout <<"\tinjecting dep = '"<<func->getName()<<"'"<<std::endl; - injectFunction(func); - } - - llvm::Function *originalFunc = m_builtins->getFunction(name); - injectFunction(originalFunc, op); - return m_functions[op]; -} - -llvm::Module * InstructionsSoa::currentModule() const -{ - BasicBlock *block = m_builder.GetInsertBlock(); - if (!block || !block->getParent()) - return 0; - - return block->getParent()->getParent(); -} - -void InstructionsSoa::createBuiltins() -{ - std::string ErrMsg; - MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( - (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]); - m_builtins = ParseBitcodeFile(buffer, &ErrMsg); - std::cout<<"Builtins created at "<<m_builtins<<" ("<<ErrMsg<<")"<<std::endl; - assert(m_builtins); - createDependencies(); -} - - -std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> in1) -{ - llvm::Function *func = function(TGSI_OPCODE_ABS); - return callBuiltin(func, in1); -} - -std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); - res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); - res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); - res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in) -{ - std::vector<llvm::Value*> res(4); - - //Extract x's - llvm::Value *x1 = m_builder.CreateExtractElement(in[0], - m_storage->constantInt(0), - name("extractX")); - //cast it to an unsigned int - x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); - - res[0] = x1;//vectorFromVals(x1, x2, x3, x4); - //only x is valid. the others shouldn't be necessary - /* - res[1] = Constant::getNullValue(m_floatVecType); - res[2] = Constant::getNullValue(m_floatVecType); - res[3] = Constant::getNullValue(m_floatVecType); - */ - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_DP3); - return callBuiltin(func, in1, in2); -} - -std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) -{ - llvm::Function *func = function(TGSI_OPCODE_LIT); - return callBuiltin(func, in); -} - -std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2, - const std::vector<llvm::Value*> in3) -{ - std::vector<llvm::Value*> res = mul(in1, in2); - return add(res, in3); -} - -std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MAX); - return callBuiltin(func, in1, in2); -} - -std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MIN); - return callBuiltin(func, in1, in2); -} - -std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); - res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); - res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); - res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_POW); - return callBuiltin(func, in1, in2); -} - -std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) -{ - llvm::Function *func = function(TGSI_OPCODE_RSQ); - return callBuiltin(func, in); -} - -std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_SLT); - return callBuiltin(func, in1, in2); -} - -std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); - res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); - res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); - res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); - - return res; -} - -void checkFunction(Function *func) -{ - for (Function::const_iterator BI = func->begin(), BE = func->end(); - BI != BE; ++BI) { - const BasicBlock &BB = *BI; - for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); - II != IE; ++II) { - const Instruction &I = *II; - std::cout<< "Instr = "<<I; - for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) { - const Value *Op = I.getOperand(op); - std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl; - //I->setOperand(op, V); - } - } - } -} - -llvm::Value * InstructionsSoa::allocaTemp() -{ - VectorType *vector = VectorType::get(Type::FloatTy, 4); - ArrayType *vecArray = ArrayType::get(vector, 4); - AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), - m_builder.GetInsertBlock()); - - std::vector<Value*> indices; - indices.push_back(m_storage->constantInt(0)); - indices.push_back(m_storage->constantInt(0)); - GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca, - indices.begin(), - indices.end(), - name("allocaPtr"), - m_builder.GetInsertBlock()); - return getElem; -} - -std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) -{ - GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(0), - name("xPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(1), - name("yPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(2), - name("zPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr, - m_storage->constantInt(3), - name("wPtr"), - m_builder.GetInsertBlock()); - - std::vector<llvm::Value*> res(4); - res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); - res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock()); - res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock()); - res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock()); - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::dp4(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_DP4); - return callBuiltin(func, in1, in2); -} - -std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1) -{ - std::vector<Value*> params; - - llvm::Value *allocaPtr = allocaTemp(); - params.push_back(allocaPtr); - params.push_back(in1[0]); - params.push_back(in1[1]); - params.push_back(in1[2]); - params.push_back(in1[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - - return allocaToResult(allocaPtr); -} - -std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<Value*> params; - - llvm::Value *allocaPtr = allocaTemp(); - params.push_back(allocaPtr); - params.push_back(in1[0]); - params.push_back(in1[1]); - params.push_back(in1[2]); - params.push_back(in1[3]); - params.push_back(in2[0]); - params.push_back(in2[1]); - params.push_back(in2[2]); - params.push_back(in2[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - - return allocaToResult(allocaPtr); -} - -std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2, - const std::vector<llvm::Value*> in3) -{ - std::vector<Value*> params; - - llvm::Value *allocaPtr = allocaTemp(); - params.push_back(allocaPtr); - params.push_back(in1[0]); - params.push_back(in1[1]); - params.push_back(in1[2]); - params.push_back(in1[3]); - params.push_back(in2[0]); - params.push_back(in2[1]); - params.push_back(in2[2]); - params.push_back(in2[3]); - params.push_back(in3[0]); - params.push_back(in3[1]); - params.push_back(in3[2]); - params.push_back(in3[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - - return allocaToResult(allocaPtr); -} - -void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) -{ - assert(originalFunc); - std::cout << "injecting function originalFunc " <<originalFunc->getName() <<std::endl; - if (op != TGSI_OPCODE_LAST) { - /* in this case it's possible the function has been already - * injected as part of the dependency chain, which gets - * injected below */ - llvm::Function *func = currentModule()->getFunction(originalFunc->getName()); - if (func) { - m_functions[op] = func; - return; - } - } - llvm::Function *func = 0; - if (originalFunc->isDeclaration()) { - func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage, - originalFunc->getName(), currentModule()); - func->setCallingConv(CallingConv::C); - const AttrListPtr pal; - func->setAttributes(pal); - currentModule()->dump(); - } else { - DenseMap<const Value*, Value *> val; - val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf"); - val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); - val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf"); - func = CloneFunction(originalFunc, val); -#if 0 - std::cout <<" replacing "<<m_builtins->getFunction("powf") - <<", with " <<currentModule()->getFunction("powf")<<std::endl; - std::cout<<"1111-------------------------------"<<std::endl; - checkFunction(originalFunc); - std::cout<<"2222-------------------------------"<<std::endl; - checkFunction(func); - std::cout <<"XXXX = " <<val[m_builtins->getFunction("powf")]<<std::endl; -#endif - currentModule()->getFunctionList().push_back(func); - } - if (op != TGSI_OPCODE_LAST) { - m_functions[op] = func; - } -} - - diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h deleted file mode 100644 index d6831e0a6b9..00000000000 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ /dev/null @@ -1,116 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INSTRUCTIONSSOA_H -#define INSTRUCTIONSSOA_H - -#include <pipe/p_shader_tokens.h> -#include <llvm/Support/IRBuilder.h> - -#include <map> -#include <vector> - -namespace llvm { - class Module; - class Function; - class BasicBlock; - class Value; -} -class StorageSoa; - -class InstructionsSoa -{ -public: - InstructionsSoa(llvm::Module *mod, llvm::Function *func, - llvm::BasicBlock *block, StorageSoa *storage); - - std::vector<llvm::Value*> abs(const std::vector<llvm::Value*> in1); - std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in); - std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> dp3(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> lit(const std::vector<llvm::Value*> in); - std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2, - const std::vector<llvm::Value*> in3); - std::vector<llvm::Value*> max(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> min(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> pow(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> rsq(const std::vector<llvm::Value*> in1); - std::vector<llvm::Value*> slt(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> sub(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - void end(); - - std::vector<llvm::Value*> extractVector(llvm::Value *vector); - llvm::IRBuilder<>* getIRBuilder(); -private: - const char * name(const char *prefix) const; - llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w); - void createFunctionMap(); - void createBuiltins(); - void createDependencies(); - llvm::Function *function(int); - llvm::Module *currentModule() const; - llvm::Value *allocaTemp(); - std::vector<llvm::Value*> allocaToResult(llvm::Value *allocaPtr); - std::vector<llvm::Value*> callBuiltin(llvm::Function *func, - const std::vector<llvm::Value*> in1); - std::vector<llvm::Value*> callBuiltin(llvm::Function *func, - const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2); - std::vector<llvm::Value*> callBuiltin(llvm::Function *func, - const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2, - const std::vector<llvm::Value*> in3); - void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); -private: - llvm::IRBuilder<> m_builder; - StorageSoa *m_storage; - - std::map<int, std::string> m_functionsMap; - std::map<int, llvm::Function*> m_functions; - llvm::Module *m_builtins; - std::map<std::string, std::vector<std::string> > m_builtinDependencies; - -private: - mutable int m_idx; - mutable char m_name[32]; -}; - - -#endif diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c deleted file mode 100644 index d5a003a48b2..00000000000 --- a/src/gallium/auxiliary/gallivm/llvm_builtins.c +++ /dev/null @@ -1,114 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ -typedef __attribute__(( ext_vector_type(4) )) float float4; - -extern float powf(float a, float b); - -inline float approx(float a, float b) -{ - if (b < -128.0f) b = -128.0f; - if (b > 128.0f) b = 128.0f; - if (a < 0) a = 0; - return powf(a, b); -} - -inline float4 lit(float4 tmp) -{ - float4 result; - result.x = 1.0; - result.w = 1.0; - if (tmp.x > 0) { - result.y = tmp.x; - result.z = approx(tmp.y, tmp.w); - } else { - result.y = 0; - result.z = 0; - } - return result; -} - -inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2) -{ - float4 result; - - result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x; - result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y; - result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z; - result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w; - - return result; -} - -extern float cosf(float val); -extern float sinf(float val); - -inline float4 vcos(float4 val) -{ - float4 result; - printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w); - result.x = cosf(val.x); - result.y = cosf(val.x); - result.z = cosf(val.x); - result.w = cosf(val.x); - printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w); - return result; -} - -inline float4 scs(float4 val) -{ - float4 result; - float tmp = val.x; - result.x = cosf(tmp); - result.y = sinf(tmp); - return result; -} - - -inline float4 vsin(float4 val) -{ - float4 result; - float tmp = val.x; - float res = sinf(tmp); - result.x = res; - result.y = res; - result.z = res; - result.w = res; - return result; -} - -inline int kil(float4 val) -{ - if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) - return 1; - else - return 0; -} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp deleted file mode 100644 index 556dbec3661..00000000000 --- a/src/gallium/auxiliary/gallivm/loweringpass.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "loweringpass.h" - -using namespace llvm; - -char LoweringPass::ID = 0; -RegisterPass<LoweringPass> X("lowering", "Lowering Pass"); - -LoweringPass::LoweringPass() - : ModulePass((intptr_t)&ID) -{ -} - -bool LoweringPass::runOnModule(Module &m) -{ - llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; - return false; -} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h deleted file mode 100644 index f62dcf6ba73..00000000000 --- a/src/gallium/auxiliary/gallivm/loweringpass.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LOWERINGPASS_H -#define LOWERINGPASS_H - -#include "llvm/Pass.h" -#include "llvm/Module.h" - -struct LoweringPass : public llvm::ModulePass -{ - static char ID; - LoweringPass(); - - virtual bool runOnModule(llvm::Module &m); -}; - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/auxiliary/gallivm/lp_bld_alpha.c index 2b4bc5c819d..7245730350c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.c @@ -35,7 +35,6 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_flow.h" #include "lp_bld_debug.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h index 634575670db..634575670db 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index eea6b5d6a5c..54b31befe6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -629,7 +629,7 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long absMask = ~(1 << (type.width - 1)); + unsigned long long absMask = ~(1ULL << (type.width - 1)); LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); @@ -874,6 +874,9 @@ lp_build_iround(struct lp_build_context *bld, } +/** + * Convert float[] to int[] with floor(). + */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) @@ -900,6 +903,7 @@ lp_build_ifloor(struct lp_build_context *bld, sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); @@ -908,11 +912,14 @@ lp_build_ifloor(struct lp_build_context *bld, /* offset = a < 0 ? -0.99999(9)f : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); + lp_build_name(offset, "floor.offset"); res = LLVMBuildAdd(bld->builder, a, offset, ""); + lp_build_name(res, "floor.res"); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + lp_build_name(res, "floor"); return res; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 62be4b9aee1..62be4b9aee1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h index da272e549f3..da272e549f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c index ced7b9c11d7..0215bb72ac6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend_aos.c @@ -44,6 +44,7 @@ #include "pipe/p_state.h" +#include "util/u_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -314,9 +315,10 @@ lp_build_blend_aos(LLVMBuilderRef builder, LLVMValueRef dst_term; /* FIXME */ - assert(blend->colormask == 0xf); + assert(blend->independent_blend_enable == 0); + assert(blend->rt[0].colormask == 0xf); - if(!blend->blend_enable) + if(!blend->rt[0].blend_enable) return src; /* It makes no sense to blend unless values are normalized */ @@ -333,14 +335,16 @@ lp_build_blend_aos(LLVMBuilderRef builder, * combinations it is possible to reorder the operations and therefore saving * some instructions. */ - src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle); + src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor, + blend->rt[0].alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor, + blend->rt[0].alpha_dst_factor, alpha_swizzle); lp_build_name(src_term, "src_term"); lp_build_name(dst_term, "dst_term"); - if(blend->rgb_func == blend->alpha_func) { - return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term); + if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) { + return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); } else { /* Seperate RGB / A functions */ @@ -348,8 +352,8 @@ lp_build_blend_aos(LLVMBuilderRef builder, LLVMValueRef rgb; LLVMValueRef alpha; - rgb = lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term); + rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term); return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c index 88321f62a2c..1eac0a5c891 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend_logicop.c @@ -35,6 +35,7 @@ #include "pipe/p_state.h" +#include "util/u_debug.h" #include "lp_bld_blend.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c index 9511299d558..6d5a45db7a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend_soa.c @@ -69,9 +69,9 @@ #include "pipe/p_state.h" +#include "util/u_debug.h" #include "lp_bld_type.h" -#include "lp_bld_const.h" #include "lp_bld_arit.h" #include "lp_bld_blend.h" @@ -218,7 +218,7 @@ lp_build_blend_soa(LLVMBuilderRef builder, } for (i = 0; i < 4; ++i) { - if (blend->colormask & (1 << i)) { + if (blend->rt[0].colormask & (1 << i)) { if (blend->logicop_enable) { if(!type.floating) { res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); @@ -226,10 +226,10 @@ lp_build_blend_soa(LLVMBuilderRef builder, else res[i] = dst[i]; } - else if (blend->blend_enable) { - unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor; - unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor; - unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func; + else if (blend->rt[0].blend_enable) { + unsigned src_factor = i < 3 ? blend->rt[0].rgb_src_factor : blend->rt[0].alpha_src_factor; + unsigned dst_factor = i < 3 ? blend->rt[0].rgb_dst_factor : blend->rt[0].alpha_dst_factor; + unsigned func = i < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); /* It makes no sense to blend unless values are normalized */ @@ -270,7 +270,7 @@ lp_build_blend_soa(LLVMBuilderRef builder, /* See if this function has been previously applied */ for(j = 0; j < i; ++j) { - unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func; + unsigned prev_func = j < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); if((!func_reverse && diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index c8eaa8c3940..c8eaa8c3940 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index cb8e1c7b006..cb8e1c7b006 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 99352094379..f77cf787213 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -63,11 +63,9 @@ #include "util/u_debug.h" #include "util/u_math.h" -#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_intr.h" #include "lp_bld_arit.h" #include "lp_bld_pack.h" #include "lp_bld_conv.h" @@ -125,6 +123,10 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ + /* YES: this fixes progs/trivial/tri-z-eq.c. + * Otherwise vertex Z=1.0 values get converted to something like + * 0xfffffb00 and the test for equality with 0xffffffff fails. + */ #if 0 { LLVMValueRef msb; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 948e68fae4f..948e68fae4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c index 39dfc51e503..39dfc51e503 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 583e6132b4b..583e6132b4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c index d438c0e63d7..d438c0e63d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h index 79d6981bb51..79d6981bb51 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index 25c10af29f0..bc831389085 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -41,13 +41,13 @@ #define LP_BUILD_FLOW_MAX_VARIABLES 32 #define LP_BUILD_FLOW_MAX_DEPTH 32 - /** * Enumeration of all possible flow constructs. */ enum lp_build_flow_construct_kind { - lP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP + LP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_IF }; @@ -73,7 +73,21 @@ struct lp_build_flow_skip /** Number of variables declared at the beginning */ unsigned num_variables; - LLVMValueRef *phi; + LLVMValueRef *phi; /**< array [num_variables] */ +}; + + +/** + * if/else/endif. + */ +struct lp_build_flow_if +{ + unsigned num_variables; + + LLVMValueRef *phi; /**< array [num_variables] */ + + LLVMValueRef condition; + LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; @@ -84,6 +98,7 @@ union lp_build_flow_construct_data { struct lp_build_flow_scope scope; struct lp_build_flow_skip skip; + struct lp_build_flow_if ifthen; }; @@ -145,6 +160,10 @@ lp_build_flow_destroy(struct lp_build_flow_context *flow) } +/** + * Begin/push a new flow control construct, such as a loop, skip block + * or variable scope. + */ static union lp_build_flow_construct_data * lp_build_flow_push(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -158,6 +177,10 @@ lp_build_flow_push(struct lp_build_flow_context *flow, } +/** + * Return the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ static union lp_build_flow_construct_data * lp_build_flow_peek(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -174,6 +197,10 @@ lp_build_flow_peek(struct lp_build_flow_context *flow, } +/** + * End/pop the current/top flow control construct on the stack. + * \param kind the expected type of the top-most construct + */ static union lp_build_flow_construct_data * lp_build_flow_pop(struct lp_build_flow_context *flow, enum lp_build_flow_construct_kind kind) @@ -200,7 +227,7 @@ lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; @@ -213,11 +240,11 @@ lp_build_flow_scope_begin(struct lp_build_flow_context *flow) * * A variable is a named entity which can have different LLVMValueRef's at * different points of the program. This is relevant for control flow because - * when there are mutiple branches to a same location we need to replace + * when there are multiple branches to a same location we need to replace * the variable's value with a Phi function as explained in * http://en.wikipedia.org/wiki/Static_single_assignment_form . * - * We keep track of variables by keeping around a pointer to where their + * We keep track of variables by keeping around a pointer to where they're * current. * * There are a few cautions to observe: @@ -241,7 +268,7 @@ lp_build_flow_scope_declare(struct lp_build_flow_context *flow, { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; @@ -263,7 +290,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) { struct lp_build_flow_scope *scope; - scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; if(!scope) return; @@ -277,27 +304,47 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) } +/** + * Note: this function has no dependencies on the flow code and could + * be used elsewhere. + */ static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) { LLVMBasicBlockRef current_block; LLVMBasicBlockRef next_block; LLVMBasicBlockRef new_block; - current_block = LLVMGetInsertBlock(flow->builder); + /* get current basic block */ + current_block = LLVMGetInsertBlock(builder); + /* check if there's another block after this one */ next_block = LLVMGetNextBasicBlock(current_block); - if(next_block) { - new_block = LLVMInsertBasicBlock(next_block, ""); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlock(next_block, name); } else { + /* append new block after current block */ LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlock(function, ""); + new_block = LLVMAppendBasicBlock(function, name); } return new_block; } + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + return lp_build_insert_new_block(flow->builder, ""); +} + + +/** + * Begin a "skip" block. Inside this block we can test a condition and + * skip to the end of the block if the condition is false. + */ void lp_build_flow_skip_begin(struct lp_build_flow_context *flow) { @@ -309,13 +356,16 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) if(!skip) return; + /* create new basic block */ skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; if(!skip->num_variables) { skip->phi = NULL; return; } + /* Allocate a Phi node for each variable in this skip scope */ skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); if(!skip->phi) { skip->num_variables = 0; @@ -325,6 +375,7 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, skip->block); + /* create a Phi node for each variable */ for(i = 0; i < skip->num_variables; ++i) skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); @@ -332,6 +383,10 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) } +/** + * Insert code to test a condition and branch to the end of the current + * skip block if the condition is true. + */ void lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, LLVMValueRef cond) @@ -349,15 +404,17 @@ lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, new_block = lp_build_flow_insert_block(flow); + /* for each variable, update the Phi node with a (variable, block) pair */ for(i = 0; i < skip->num_variables; ++i) { assert(*flow->variables[i]); LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); } + /* if cond is true, goto skip->block, else goto new_block */ LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); LLVMPositionBuilderAtEnd(flow->builder, new_block); - } +} void @@ -373,12 +430,14 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) current_block = LLVMGetInsertBlock(flow->builder); + /* add (variable, block) tuples to the phi nodes */ for(i = 0; i < skip->num_variables; ++i) { assert(*flow->variables[i]); LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); *flow->variables[i] = skip->phi[i]; } + /* goto block */ LLVMBuildBr(flow->builder, skip->block); LLVMPositionBuilderAtEnd(flow->builder, skip->block); @@ -386,22 +445,34 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) } +/** + * Check if the mask predicate is zero. If so, jump to the end of the block. + */ static void lp_build_mask_check(struct lp_build_mask_context *mask) { LLVMBuilderRef builder = mask->flow->builder; LLVMValueRef cond; + /* cond = (mask == 0) */ cond = LLVMBuildICmp(builder, LLVMIntEQ, LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), LLVMConstNull(mask->reg_type), ""); + /* if cond, goto end of block */ lp_build_flow_skip_cond_break(mask->flow, cond); } +/** + * Begin a section of code which is predicated on a mask. + * \param mask the mask context, initialized here + * \param flow the flow context + * \param type the type of the mask + * \param value storage for the mask + */ void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, @@ -422,6 +493,11 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, } +/** + * Update boolean mask with given value (bitwise AND). + * Typically used to update the quad's pixel alive/killed mask + * after depth testing, alpha testing, TGSI_OPCODE_KIL, etc. + */ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) @@ -432,6 +508,9 @@ lp_build_mask_update(struct lp_build_mask_context *mask, } +/** + * End section of code which is predicated on a mask. + */ LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { @@ -491,3 +570,188 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMPositionBuilderAtEnd(builder, after_block); } + + +/* + Example of if/then/else building: + + int x; + if (cond) { + x = 1 + 2; + } + else { + x = 2 + 3; + } + + Is built with: + + LLVMValueRef x = LLVMGetUndef(); // or something else + + flow = lp_build_flow_create(builder); + + lp_build_flow_scope_begin(flow); + + // x needs a phi node + lp_build_flow_scope_declare(flow, &x); + + lp_build_if(ctx, flow, builder, cond); + x = LLVMAdd(1, 2); + lp_build_else(ctx); + x = LLVMAdd(2, 3); + lp_build_endif(ctx); + + lp_build_flow_scope_end(flow); + + lp_build_flow_destroy(flow); + */ + + + +/** + * Begin an if/else/endif construct. + */ +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + struct lp_build_flow_if *ifthen; + unsigned i; + + memset(ctx, 0, sizeof(*ctx)); + ctx->builder = builder; + ctx->flow = flow; + + /* push/create new scope */ + ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + ifthen->num_variables = flow->num_variables; + ifthen->condition = condition; + ifthen->entry_block = block; + + /* create a Phi node for each variable in this flow scope */ + ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); + if (!ifthen->phi) { + ifthen->num_variables = 0; + return; + } + + /* create endif/merge basic block for the phi functions */ + ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); + + /* create a phi node for each variable */ + for (i = 0; i < flow->num_variables; i++) { + ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + /* add add the initial value of the var from the entry block */ + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + } + + /* create/insert true_block before merge_block */ + ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); + + /* successive code goes into the true block */ + LLVMPositionBuilderAtEnd(builder, ifthen->true_block); +} + + +/** + * Begin else-part of a conditional + */ +void +lp_build_else(struct lp_build_if_state *ctx) +{ + struct lp_build_flow_context *flow = ctx->flow; + struct lp_build_flow_if *ifthen; + unsigned i; + + ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + /* for each variable, update the Phi node with a (variable, block) pair */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); + } + + /* create/insert false_block before the merge block */ + ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); + + /* successive code goes into the else block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); +} + + +/** + * End a conditional. + */ +void +lp_build_endif(struct lp_build_if_state *ctx) +{ + struct lp_build_flow_context *flow = ctx->flow; + struct lp_build_flow_if *ifthen; + unsigned i; + + ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; + assert(ifthen); + + if (ifthen->false_block) { + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + /* for each variable, update the Phi node with a (variable, block) pair */ + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); + + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; + } + } + else { + /* no else clause */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + for (i = 0; i < flow->num_variables; i++) { + assert(*flow->variables[i]); + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); + + /* replace the variable ref with the phi function */ + *flow->variables[i] = ifthen->phi[i]; + } + } + + FREE(ifthen->phi); + + /*** + *** Now patch in the various branch instructions. + ***/ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); + if (ifthen->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(ctx->builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); + } + + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { + /* Append an unconditional Br(anch) instruction on the false_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + LLVMBuildBr(ctx->builder, ifthen->merge_block); + } + + + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index e61999ff06b..4c225a0d4f9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -126,4 +126,26 @@ lp_build_loop_end(LLVMBuilderRef builder, + +struct lp_build_if_state +{ + LLVMBuilderRef builder; + struct lp_build_flow_context *flow; +}; + + +void +lp_build_if(struct lp_build_if_state *ctx, + struct lp_build_flow_context *flow, + LLVMBuilderRef builder, + LLVMValueRef condition); + +void +lp_build_else(struct lp_build_if_state *ctx); + +void +lp_build_endif(struct lp_build_if_state *ctx); + + + #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 970bee379f5..970bee379f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 10e82f120bb..dfa080b8533 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -38,7 +38,6 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_format.h" diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/auxiliary/gallivm/lp_bld_format_query.c index f3832d07ff9..f3832d07ff9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_query.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 64151d169da..64151d169da 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/auxiliary/gallivm/lp_bld_interp.c index 49dab8ab61e..a6acaead887 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.c @@ -45,6 +45,36 @@ #include "lp_bld_interp.h" +/* + * The shader JIT function operates on blocks of quads. + * Each block has 2x2 quads and each quad has 2x2 pixels. + * + * We iterate over the quads in order 0, 1, 2, 3: + * + * ################# + * # | # | # + * #---0---#---1---# + * # | # | # + * ################# + * # | # | # + * #---2---#---3---# + * # | # | # + * ################# + * + * Within each quad, we have four pixels which are represented in SOA + * order: + * + * ######### + * # 0 | 1 # + * #---+---# + * # 2 | 3 # + * ######### + * + * So the green channel (for example) of the four pixels is stored in + * a single vector register: {g0, g1, g2, g3}. + */ + + static void attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) { @@ -55,6 +85,10 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix } +/** + * Initialize the bld->a0, dadx, dady fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, @@ -91,7 +125,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, case TGSI_INTERPOLATE_CONSTANT: a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); a0 = lp_build_broadcast_scalar(&bld->base, a0); - attrib_name(a0, attrib, chan, ".dady"); + attrib_name(a0, attrib, chan, ".a0"); break; default: @@ -109,30 +143,13 @@ coeffs_init(struct lp_build_interp_soa_context *bld, /** - * Multiply the dadx and dady with the xstep and ystep respectively. + * Emit LLVM code to compute the fragment shader input attribute values. + * For example, for a color input, we'll compute red, green, blue and alpha + * values for the four pixels in a quad. + * Recall that we're operating on 4-element vectors so each arithmetic + * operation is operating on the four pixels in a quad. */ static void -coeffs_update(struct lp_build_interp_soa_context *bld) -{ - unsigned attrib; - unsigned chan; - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - if (mode != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep); - } - } - } - } -} - - -static void attribs_init(struct lp_build_interp_soa_context *bld) { LLVMValueRef x = bld->pos[0]; @@ -154,7 +171,9 @@ attribs_init(struct lp_build_interp_soa_context *bld) res = a0; if (mode != TGSI_INTERPOLATE_CONSTANT) { + /* res = res + x * dadx */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); + /* res = res + y * dady */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); } @@ -178,13 +197,19 @@ attribs_init(struct lp_build_interp_soa_context *bld) } +/** + * Increment the shader input attribute values. + * This is called when we move from one quad to the next. + */ static void -attribs_update(struct lp_build_interp_soa_context *bld) +attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; + assert(quad_index < 4); + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { unsigned mask = bld->mask[attrib]; unsigned mode = bld->mode[attrib]; @@ -198,13 +223,21 @@ attribs_update(struct lp_build_interp_soa_context *bld) res = bld->attribs_pre[attrib][chan]; - if(bld->xstep) + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad */ + /* build res = res + dadx + dadx */ + res = lp_build_add(&bld->base, res, dadx); res = lp_build_add(&bld->base, res, dadx); + } - if(bld->ystep) + if (quad_index == 2 || quad_index == 3) { + /* bottom-left or bottom-right quad */ + /* build res = res + dady + dady */ + res = lp_build_add(&bld->base, res, dady); res = lp_build_add(&bld->base, res, dady); + } - bld->attribs_pre[attrib][chan] = res; + //XXX bld->attribs_pre[attrib][chan] = res; if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { LLVMValueRef w = bld->pos[3]; @@ -242,17 +275,32 @@ pos_init(struct lp_build_interp_soa_context *bld, } +/** + * Update quad position values when moving to the next quad. + */ static void -pos_update(struct lp_build_interp_soa_context *bld) +pos_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef x = bld->attribs[0][0]; LLVMValueRef y = bld->attribs[0][1]; + const int xstep = 2, ystep = 2; - if(bld->xstep) - x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep)); + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad in block */ + /* build x += xstep */ + x = lp_build_add(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } - if(bld->ystep) - y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep)); + if (quad_index == 2) { + /* bottom-left quad in block */ + /* build y += ystep */ + y = lp_build_add(&bld->base, y, + lp_build_const_scalar(bld->base.type, ystep)); + /* build x -= xstep */ + x = lp_build_sub(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } lp_build_name(x, "pos.x"); lp_build_name(y, "pos.y"); @@ -262,18 +310,20 @@ pos_update(struct lp_build_interp_soa_context *bld) } +/** + * Initialize fragment shader input attribute info. + */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, + boolean flatshade, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep) + LLVMValueRef y0) { struct tgsi_parse_context parse; struct tgsi_full_declaration *decl; @@ -309,7 +359,15 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, for( attrib = first; attrib <= last; ++attrib ) { bld->mask[1 + attrib] = mask; - bld->mode[1 + attrib] = decl->Declaration.Interpolate; + + /* XXX: have mesa set INTERP_CONSTANT in the fragment + * shader. + */ + if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + flatshade) + bld->mode[1 + attrib] = TGSI_INTERPOLATE_CONSTANT; + else + bld->mode[1 + attrib] = decl->Declaration.Interpolate; } bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1); @@ -331,21 +389,19 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); attribs_init(bld); - - bld->xstep = xstep; - bld->ystep = ystep; - - coeffs_update(bld); } /** - * Advance the position and inputs with the xstep and ystep. + * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld) +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index) { - pos_update(bld); + assert(quad_index < 4); + + pos_update(bld, quad_index); - attribs_update(bld); + attribs_update(bld, quad_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h index 9c57a10879b..ca958cdf343 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -63,9 +63,6 @@ struct lp_build_interp_soa_context LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - int xstep; - int ystep; - /* Attribute values before perspective divide */ LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; @@ -82,18 +79,18 @@ struct lp_build_interp_soa_context void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, + boolean flatshade, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep); + LLVMValueRef y0); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld); +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index); #endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c index 9895749d568..9895749d568 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_intr.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index f813f27074b..f813f27074b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index db22a8028a6..41ac81b7441 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -34,6 +34,7 @@ #include "util/u_cpu_detect.h" +#include "util/u_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -41,13 +42,17 @@ #include "lp_bld_logic.h" +/** + * Build code to compare two values 'a' and 'b' of 'type' using the given func. + * \param func one of PIPE_FUNC_x + */ LLVMValueRef -lp_build_cmp(struct lp_build_context *bld, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) { - const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -56,6 +61,9 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef res; unsigned i; + assert(func >= PIPE_FUNC_NEVER); + assert(func <= PIPE_FUNC_ALWAYS); + if(func == PIPE_FUNC_NEVER) return zeros; if(func == PIPE_FUNC_ALWAYS) @@ -68,6 +76,7 @@ lp_build_cmp(struct lp_build_context *bld, #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { + /* float[4] comparison */ LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -96,7 +105,7 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } if(swap) { @@ -109,14 +118,15 @@ lp_build_cmp(struct lp_build_context *bld, } args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); - res = lp_build_intrinsic(bld->builder, + res = lp_build_intrinsic(builder, "llvm.x86.sse.cmp.ps", vec_type, args, 3); - res = LLVMBuildBitCast(bld->builder, res, int_vec_type, ""); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); return res; } else if(util_cpu_caps.has_sse2) { + /* int[4] comparison */ static const struct { unsigned swap:1; unsigned eq:1; @@ -152,7 +162,7 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } /* There are no signed byte and unsigned word/dword comparison @@ -162,8 +172,8 @@ lp_build_cmp(struct lp_build_context *bld, ((type.width == 8 && type.sign) || (type.width != 8 && !type.sign))) { LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); - a = LLVMBuildXor(bld->builder, a, msb, ""); - b = LLVMBuildXor(bld->builder, b, msb, ""); + a = LLVMBuildXor(builder, a, msb, ""); + b = LLVMBuildXor(builder, b, msb, ""); } if(table[func].swap) { @@ -176,14 +186,14 @@ lp_build_cmp(struct lp_build_context *bld, } if(table[func].eq) - res = lp_build_intrinsic(bld->builder, pcmpeq, vec_type, args, 2); + res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); else if (table[func].gt) - res = lp_build_intrinsic(bld->builder, pcmpgt, vec_type, args, 2); + res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); else res = LLVMConstNull(vec_type); if(table[func].not) - res = LLVMBuildNot(bld->builder, res, ""); + res = LLVMBuildNot(builder, res, ""); return res; } @@ -219,28 +229,28 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } #if 0 /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildFCmp(bld->builder, op, a, b, ""); - res = LLVMBuildSelect(bld->builder, cond, ones, zeros, ""); + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); #else debug_printf("%s: warning: using slow element-wise vector comparison\n", __FUNCTION__); res = LLVMGetUndef(int_vec_type); for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(bld->builder, op, - LLVMBuildExtractElement(bld->builder, a, index, ""), - LLVMBuildExtractElement(bld->builder, b, index, ""), + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), ""); - cond = LLVMBuildSelect(bld->builder, cond, + cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); - res = LLVMBuildInsertElement(bld->builder, res, cond, index, ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); } #endif } @@ -267,28 +277,28 @@ lp_build_cmp(struct lp_build_context *bld, break; default: assert(0); - return bld->undef; + return lp_build_undef(type); } #if 0 /* XXX: Although valid IR, no LLVM target currently support this */ - cond = LLVMBuildICmp(bld->builder, op, a, b, ""); - res = LLVMBuildSelect(bld->builder, cond, ones, zeros, ""); + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSelect(builder, cond, ones, zeros, ""); #else - debug_printf("%s: warning: using slow element-wise vector comparison\n", + debug_printf("%s: warning: using slow element-wise int vector comparison\n", __FUNCTION__); res = LLVMGetUndef(int_vec_type); for(i = 0; i < type.length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(bld->builder, op, - LLVMBuildExtractElement(bld->builder, a, index, ""), - LLVMBuildExtractElement(bld->builder, b, index, ""), + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), ""); - cond = LLVMBuildSelect(bld->builder, cond, + cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); - res = LLVMBuildInsertElement(bld->builder, res, cond, index, ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); } #endif } @@ -297,6 +307,21 @@ lp_build_cmp(struct lp_build_context *bld, } + +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + */ +LLVMValueRef +lp_build_cmp(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare(bld->builder, bld->type, func, a, b); +} + + LLVMValueRef lp_build_select(struct lp_build_context *bld, LLVMValueRef mask, @@ -394,3 +419,15 @@ lp_build_select_aos(struct lp_build_context *bld, #endif } } + +LLVMValueRef +lp_build_alloca(struct lp_build_context *bld) +{ + const struct lp_type type = bld->type; + + if (type.length > 1) { /*vector*/ + return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); + } else { /*scalar*/ + return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index d67500ef707..a399ebf39ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -46,6 +46,14 @@ struct lp_type; struct lp_build_context; +LLVMValueRef +lp_build_compare(LLVMBuilderRef builder, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b); + + /** * @param func is one of PIPE_FUNC_xxx */ @@ -68,5 +76,7 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef b, const boolean cond[4]); +LLVMValueRef +lp_build_alloca(struct lp_build_context *bld); #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 6e79438ead0..6e79438ead0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h index 0e787e0b9cb..0e787e0b9cb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index bc360ad77ad..bc360ad77ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index fb2a34984a4..fb2a34984a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 9003e108c1c..a133b56ac55 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -74,7 +74,6 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->compare_func = sampler->compare_func; } state->normalized_coords = sampler->normalized_coords; - state->prefilter = sampler->prefilter; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 8cb8210ca76..39edcf13d14 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -70,7 +70,6 @@ struct lp_sampler_static_state unsigned compare_mode:1; unsigned compare_func:3; unsigned normalized_coords:1; - unsigned prefilter:4; }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 5ee8d556a68..57c2b763e47 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -172,7 +172,7 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %s\n", + _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n", debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); @@ -201,9 +201,13 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_ifloor(&bld->coord_bld, s); y = lp_build_ifloor(&bld->coord_bld, t); + lp_build_name(x, "tex.x.floor"); + lp_build_name(y, "tex.y.floor"); x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + lp_build_name(x, "tex.x.wrapped"); + lp_build_name(y, "tex.y.wrapped"); lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } @@ -588,7 +592,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* FIXME: respect static_state->min_mip_filter */; /* FIXME: respect static_state->mag_img_filter */; - /* FIXME: respect static_state->prefilter */; lp_build_sample_compare(&bld, p, texel); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c index 3998ac374fe..3998ac374fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 740392f5611..740392f5611 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 64e81f7b1fe..64e81f7b1fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index b9472127a63..b9472127a63 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index eddb7a83fa2..eddb7a83fa2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index fb1eda4423b..a52c6c50288 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -47,13 +47,11 @@ #include "tgsi/tgsi_exec.h" #include "lp_bld_type.h" #include "lp_bld_const.h" -#include "lp_bld_intr.h" #include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_tgsi.h" -#include "lp_bld_debug.h" #define LP_MAX_TEMPS 256 @@ -187,7 +185,7 @@ emit_fetch( break; case TGSI_FILE_TEMPORARY: - res = bld->temps[reg->Register.Index][swizzle]; + res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], ""); if(!res) return bld->base.undef; break; @@ -289,11 +287,13 @@ emit_store( switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - bld->outputs[reg->Register.Index][chan_index] = value; + LLVMBuildStore(bld->base.builder, value, + bld->outputs[reg->Register.Index][chan_index]); break; case TGSI_FILE_TEMPORARY: - bld->temps[reg->Register.Index][chan_index] = value; + LLVMBuildStore(bld->base.builder, value, + bld->temps[reg->Register.Index][chan_index]); break; case TGSI_FILE_ADDRESS: @@ -440,6 +440,42 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) return FALSE; } +static int +emit_declaration( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_declaration *decl) +{ + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; + unsigned idx, i; + + for (idx = first; idx <= last; ++idx) { + boolean ok; + + switch (decl->Declaration.File) { + case TGSI_FILE_TEMPORARY: + for (i = 0; i < NUM_CHANNELS; i++) + bld->temps[idx][i] = lp_build_alloca(&bld->base); + ok = TRUE; + break; + + case TGSI_FILE_OUTPUT: + for (i = 0; i < NUM_CHANNELS; i++) + bld->outputs[idx][i] = lp_build_alloca(&bld->base); + ok = TRUE; + break; + + default: + /* don't need to declare other vars */ + ok = TRUE; + } + + if (!ok) + return FALSE; + } + + return TRUE; +} static int emit_instruction( @@ -1431,6 +1467,10 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: /* Inputs already interpolated */ + { + if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration )) + _debug_printf("warning: failed to define LLVM variable\n"); + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index 1320a267214..8270cd057f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -157,6 +157,27 @@ lp_build_int_vec_type(struct lp_type type) } +/** + * Build int32[4] vector type + */ +LLVMTypeRef +lp_build_int32_vec4_type(void) +{ + struct lp_type t; + LLVMTypeRef type; + + memset(&t, 0, sizeof(t)); + t.floating = FALSE; /* floating point values */ + t.sign = TRUE; /* values are signed */ + t.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + t.width = 32; /* 32-bit int */ + t.length = 4; /* 4 elements per vector */ + + type = lp_build_int_elem_type(t); + return LLVMVectorType(type, t.length); +} + + struct lp_type lp_int_type(struct lp_type type) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 2fb233d335f..62ee05be4df 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -252,6 +252,10 @@ LLVMTypeRef lp_build_int_vec_type(struct lp_type type); +LLVMTypeRef +lp_build_int32_vec4_type(void); + + struct lp_type lp_int_type(struct lp_type type); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c deleted file mode 100644 index cb85e1734ec..00000000000 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ /dev/null @@ -1,210 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * This file is compiled with clang into the LLVM bitcode - * - * Authors: - * Zack Rusin [email protected] - */ -typedef __attribute__(( ext_vector_type(4) )) float float4; - - -extern float fabsf(float val); - -/* helpers */ - -float4 absvec(float4 vec) -{ - float4 res; - res.x = fabsf(vec.x); - res.y = fabsf(vec.y); - res.z = fabsf(vec.z); - res.w = fabsf(vec.w); - - return res; -} - -float4 maxvec(float4 a, float4 b) -{ - return (float4){(a.x > b.x) ? a.x : b.x, - (a.y > b.y) ? a.y : b.y, - (a.z > b.z) ? a.z : b.z, - (a.w > b.w) ? a.w : b.w}; -} - -float4 minvec(float4 a, float4 b) -{ - return (float4){(a.x < b.x) ? a.x : b.x, - (a.y < b.y) ? a.y : b.y, - (a.z < b.z) ? a.z : b.z, - (a.w < b.w) ? a.w : b.w}; -} - -extern float powf(float num, float p); -extern float sqrtf(float x); - -float4 powvec(float4 vec, float4 q) -{ - float4 p; - p.x = powf(vec.x, q.x); - p.y = powf(vec.y, q.y); - p.z = powf(vec.z, q.z); - p.w = powf(vec.w, q.w); - return p; -} - -float4 sqrtvec(float4 vec) -{ - float4 p; - p.x = sqrtf(vec.x); - p.y = sqrtf(vec.y); - p.z = sqrtf(vec.z); - p.w = sqrtf(vec.w); - return p; -} - -float4 sltvec(float4 v1, float4 v2) -{ - float4 p; - p.x = (v1.x < v2.x) ? 1.0 : 0.0; - p.y = (v1.y < v2.y) ? 1.0 : 0.0; - p.z = (v1.z < v2.z) ? 1.0 : 0.0; - p.w = (v1.w < v2.w) ? 1.0 : 0.0; - return p; -} - - -/* instructions */ - -void abs(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - res[0] = absvec(tmp0x); - res[1] = absvec(tmp0y); - res[2] = absvec(tmp0z); - res[3] = absvec(tmp0w); -} - -void dp3(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + - (tmp0z * tmp1z); - - res[0] = dot; - res[1] = dot; - res[2] = dot; - res[3] = dot; -} - -void dp4(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + - (tmp0z * tmp1z) + (tmp0w * tmp1w); - - res[0] = dot; - res[1] = dot; - res[2] = dot; - res[3] = dot; -} - -void lit(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; - const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; - const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; - - res[0] = (float4){1.0, 1.0, 1.0, 1.0}; - if (tmp0x.x > 0) { - float4 tmpy = maxvec(tmp0y, zerovec); - float4 tmpw = minvec(tmp0w, plus128); - tmpw = maxvec(tmpw, min128); - res[1] = tmp0x; - res[2] = powvec(tmpy, tmpw); - } else { - res[1] = zerovec; - res[2] = zerovec; - } - res[3] = (float4){1.0, 1.0, 1.0, 1.0}; -} - -void min(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = minvec(tmp0x, tmp1x); - res[1] = minvec(tmp0y, tmp1y); - res[2] = minvec(tmp0z, tmp1z); - res[3] = minvec(tmp0w, tmp1w); -} - - -void max(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = maxvec(tmp0x, tmp1x); - res[1] = maxvec(tmp0y, tmp1y); - res[2] = maxvec(tmp0z, tmp1z); - res[3] = maxvec(tmp0w, tmp1w); -} - -void pow(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = powvec(tmp0x, tmp1x); - res[1] = res[0]; - res[2] = res[0]; - res[3] = res[0]; -} - -void rsq(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - const float4 onevec = (float4) {1., 1., 1., 1.}; - res[0] = onevec/sqrtvec(absvec(tmp0x)); - res[1] = onevec/sqrtvec(absvec(tmp0y)); - res[2] = onevec/sqrtvec(absvec(tmp0z)); - res[3] = onevec/sqrtvec(absvec(tmp0w)); -} - -void slt(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - res[0] = sltvec(tmp0x, tmp1x); - res[1] = sltvec(tmp0y, tmp1y); - res[2] = sltvec(tmp0z, tmp1z); - res[3] = sltvec(tmp0w, tmp1w); -} - diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp deleted file mode 100644 index 73df24c9769..00000000000 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ -#ifdef MESA_LLVM - -#include "storage.h" - -#include "gallivm_p.h" - -#include "pipe/p_shader_tokens.h" -#include <llvm/BasicBlock.h> -#include <llvm/Module.h> -#include <llvm/Value.h> - -#include <llvm/CallingConv.h> -#include <llvm/Constants.h> -#include <llvm/DerivedTypes.h> -#include <llvm/InstrTypes.h> -#include <llvm/Instructions.h> - -using namespace llvm; - -Storage::Storage(llvm::BasicBlock *block, llvm::Value *input) - : m_block(block), - m_INPUT(input), - m_addrs(32), - m_idx(0) -{ - m_floatVecType = VectorType::get(Type::FloatTy, 4); - m_intVecType = VectorType::get(IntegerType::get(32), 4); - - m_undefFloatVec = UndefValue::get(m_floatVecType); - m_undefIntVec = UndefValue::get(m_intVecType); - m_extSwizzleVec = 0; - - m_numConsts = 0; -} - -//can only build vectors with all members in the [0, 9] range -llvm::Constant *Storage::shuffleMask(int vec) -{ - if (!m_extSwizzleVec) { - std::vector<Constant*> elems; - elems.push_back(ConstantFP::get(APFloat(0.f))); - elems.push_back(ConstantFP::get(APFloat(1.f))); - elems.push_back(ConstantFP::get(APFloat(0.f))); - elems.push_back(ConstantFP::get(APFloat(1.f))); - m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); - } - - if (m_intVecs.find(vec) != m_intVecs.end()) { - return m_intVecs[vec]; - } - int origVec = vec; - Constant* const_vec = 0; - if (origVec == 0) { - const_vec = Constant::getNullValue(m_intVecType); - } else { - int x = gallivm_x_swizzle(vec); - int y = gallivm_y_swizzle(vec); - int z = gallivm_z_swizzle(vec); - int w = gallivm_w_swizzle(vec); - std::vector<Constant*> elems; - elems.push_back(constantInt(x)); - elems.push_back(constantInt(y)); - elems.push_back(constantInt(z)); - elems.push_back(constantInt(w)); - const_vec = ConstantVector::get(m_intVecType, elems); - } - - m_intVecs[origVec] = const_vec; - return const_vec; -} - -llvm::ConstantInt *Storage::constantInt(int idx) -{ - if (m_constInts.find(idx) != m_constInts.end()) { - return m_constInts[idx]; - } - ConstantInt *const_int = ConstantInt::get(APInt(32, idx)); - m_constInts[idx] = const_int; - return const_int; -} - -llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx) -{ - Value *val = element(InputsArg, idx, indIdx); - LoadInst *load = new LoadInst(val, name("input"), false, m_block); - load->setAlignment(8); - - return load; -} - -llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx) -{ - m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts; - - Value *elem = element(ConstsArg, idx, indIdx); - LoadInst *load = new LoadInst(elem, name("const"), false, m_block); - load->setAlignment(8); - return load; -} - -llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle) -{ - Constant *mask = shuffleMask(shuffle); - ShuffleVectorInst *res = - new ShuffleVectorInst(vec, m_extSwizzleVec, mask, - name("shuffle"), m_block); - return res; -} - - -llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx) -{ - Value *elem = element(TempsArg, idx, indIdx); - - LoadInst *load = new LoadInst(elem, name("temp"), false, m_block); - load->setAlignment(8); - - return load; -} - -void Storage::setTempElement(int idx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = 0; - if (m_tempWriteMap[idx]) - templ = tempElement(idx); - val = maskWrite(val, mask, templ); - } - Value *elem = element(TempsArg, idx); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); - m_tempWriteMap[idx] = true; -} - -void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = 0; - if (m_destWriteMap[dstIdx]) - templ = outputElement(dstIdx); - val = maskWrite(val, mask, templ); - } - - Value *elem = element(DestsArg, dstIdx); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); - m_destWriteMap[dstIdx] = true; -} - -llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) -{ - llvm::Value *dst = templ; - if (!dst) - dst = Constant::getNullValue(m_floatVecType); - if ((mask & TGSI_WRITEMASK_X)) { - llvm::Value *x = new ExtractElementInst(src, unsigned(0), - name("x"), m_block); - dst = InsertElementInst::Create(dst, x, unsigned(0), - name("dstx"), m_block); - } - if ((mask & TGSI_WRITEMASK_Y)) { - llvm::Value *y = new ExtractElementInst(src, unsigned(1), - name("y"), m_block); - dst = InsertElementInst::Create(dst, y, unsigned(1), - name("dsty"), m_block); - } - if ((mask & TGSI_WRITEMASK_Z)) { - llvm::Value *z = new ExtractElementInst(src, unsigned(2), - name("z"), m_block); - dst = InsertElementInst::Create(dst, z, unsigned(2), - name("dstz"), m_block); - } - if ((mask & TGSI_WRITEMASK_W)) { - llvm::Value *w = new ExtractElementInst(src, unsigned(3), - name("w"), m_block); - dst = InsertElementInst::Create(dst, w, unsigned(3), - name("dstw"), m_block); - } - return dst; -} - -const char * Storage::name(const char *prefix) -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -int Storage::numConsts() const -{ - return m_numConsts; -} - -llvm::Value * Storage::addrElement(int idx) const -{ - Value *ret = m_addrs[idx]; - if (!ret) - return m_undefFloatVec; - return ret; -} - -void Storage::setAddrElement(int idx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = m_addrs[idx]; - val = maskWrite(val, mask, templ); - } - m_addrs[idx] = val; -} - -llvm::Value * Storage::extractIndex(llvm::Value *vec) -{ - llvm::Value *x = new ExtractElementInst(vec, unsigned(0), - name("x"), m_block); - return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block); -} - -void Storage::setCurrentBlock(llvm::BasicBlock *block) -{ - m_block = block; -} - -llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx) -{ - Value *elem = element(DestsArg, idx, indIdx); - LoadInst *load = new LoadInst(elem, name("output"), false, m_block); - load->setAlignment(8); - - return load; -} - -llvm::Value * Storage::inputPtr() const -{ - return m_INPUT; -} - -void Storage::pushArguments(llvm::Value *input) -{ - m_argStack.push(m_INPUT); - - m_INPUT = input; -} - -void Storage::popArguments() -{ - m_INPUT = m_argStack.top(); - m_argStack.pop(); -} - -void Storage::pushTemps() -{ - m_extSwizzleVec = 0; -} - -void Storage::popTemps() -{ -} - -llvm::Value * Storage::immediateElement(int idx) -{ - return m_immediates[idx]; -} - -void Storage::addImmediate(float *val) -{ - std::vector<Constant*> vec(4); - vec[0] = ConstantFP::get(APFloat(val[0])); - vec[1] = ConstantFP::get(APFloat(val[1])); - vec[2] = ConstantFP::get(APFloat(val[2])); - vec[3] = ConstantFP::get(APFloat(val[3])); - m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); -} - - -llvm::Value * Storage::elemPtr(Args arg) -{ - std::vector<Value*> indices; - indices.push_back(constantInt(0)); - indices.push_back(constantInt(static_cast<int>(arg))); - GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT, - indices.begin(), - indices.end(), - name("input_ptr"), - m_block); - return new LoadInst(getElem, name("input_field"), false, m_block); -} - -llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, - llvm::Value *indIdx ) -{ - GetElementPtrInst *getElem = 0; - - if (indIdx) { - getElem = GetElementPtrInst::Create(ptr, - BinaryOperator::Create(Instruction::Add, - indIdx, - constantInt(idx), - name("add"), - m_block), - name("field"), - m_block); - } else { - getElem = GetElementPtrInst::Create(ptr, - constantInt(idx), - name("field"), - m_block); - } - return getElem; -} - -llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx ) -{ - Value *val = elemPtr(arg); - return elemIdx(val, idx, indIdx); -} - -void Storage::setKilElement(llvm::Value *val) -{ - std::vector<Value*> indices; - indices.push_back(constantInt(0)); - indices.push_back(constantInt(static_cast<int>(KilArg))); - GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT, - indices.begin(), - indices.end(), - name("kil_ptr"), - m_block); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); -} - -#endif //MESA_LLVM - - diff --git a/src/gallium/auxiliary/gallivm/storage.h b/src/gallium/auxiliary/gallivm/storage.h deleted file mode 100644 index 8574f7554e3..00000000000 --- a/src/gallium/auxiliary/gallivm/storage.h +++ /dev/null @@ -1,133 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ - -#ifndef STORAGE_H -#define STORAGE_H - -#include <map> -#include <set> -#include <stack> -#include <vector> - -namespace llvm { - class BasicBlock; - class Constant; - class ConstantInt; - class LoadInst; - class Value; - class VectorType; -} - -class Storage -{ -public: - Storage(llvm::BasicBlock *block, - llvm::Value *input); - - llvm::Value *inputPtr() const; - - void setCurrentBlock(llvm::BasicBlock *block); - - llvm::ConstantInt *constantInt(int); - llvm::Constant *shuffleMask(int vec); - llvm::Value *inputElement(int idx, llvm::Value *indIdx =0); - llvm::Value *constElement(int idx, llvm::Value *indIdx =0); - llvm::Value *outputElement(int idx, llvm::Value *indIdx =0); - llvm::Value *tempElement(int idx, llvm::Value *indIdx =0); - llvm::Value *immediateElement(int idx); - - void setOutputElement(int dstIdx, llvm::Value *val, int mask); - void setTempElement(int idx, llvm::Value *val, int mask); - - llvm::Value *addrElement(int idx) const; - void setAddrElement(int idx, llvm::Value *val, int mask); - - void setKilElement(llvm::Value *val); - - llvm::Value *shuffleVector(llvm::Value *vec, int shuffle); - - llvm::Value *extractIndex(llvm::Value *vec); - - int numConsts() const; - - void pushArguments(llvm::Value *input); - void popArguments(); - void pushTemps(); - void popTemps(); - - void addImmediate(float *val); - -private: - llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ); - const char *name(const char *prefix); - - enum Args { - DestsArg = 0, - InputsArg = 1, - TempsArg = 2, - ConstsArg = 3, - KilArg = 4 - }; - llvm::Value *elemPtr(Args arg); - llvm::Value *elemIdx(llvm::Value *ptr, int idx, - llvm::Value *indIdx = 0); - llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0); - -private: - llvm::BasicBlock *m_block; - llvm::Value *m_INPUT; - - std::map<int, llvm::ConstantInt*> m_constInts; - std::map<int, llvm::Constant*> m_intVecs; - std::vector<llvm::Value*> m_addrs; - std::vector<llvm::Constant*> m_immediates; - - llvm::VectorType *m_floatVecType; - llvm::VectorType *m_intVecType; - - char m_name[32]; - int m_idx; - - int m_numConsts; - - std::map<int, bool > m_destWriteMap; - std::map<int, bool > m_tempWriteMap; - - llvm::Value *m_undefFloatVec; - llvm::Value *m_undefIntVec; - llvm::Value *m_extSwizzleVec; - - std::stack<llvm::Value*> m_argStack; - std::stack<std::vector<llvm::Value*> > m_tempStack; -}; - -#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp deleted file mode 100644 index 4984ce985c6..00000000000 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ /dev/null @@ -1,438 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "storagesoa.h" - -#include "gallivm_p.h" - -#include "pipe/p_shader_tokens.h" -#include "util/u_debug.h" - -#include <llvm/BasicBlock.h> -#include <llvm/Module.h> -#include <llvm/Value.h> - -#include <llvm/CallingConv.h> -#include <llvm/Constants.h> -#include <llvm/DerivedTypes.h> -#include <llvm/InstrTypes.h> -#include <llvm/Instructions.h> - -using namespace llvm; - - -StorageSoa::StorageSoa(llvm::BasicBlock *block, - llvm::Value *input, - llvm::Value *output, - llvm::Value *consts) - : m_block(block), - m_input(input), - m_output(output), - m_consts(consts), - m_immediates(0), - m_idx(0) -{ -} - -void StorageSoa::addImmediate(float *vec) -{ - std::vector<float> vals(4); - vals[0] = vec[0]; - vals[1] = vec[1]; - vals[2] = vec[2]; - vals[3] = vec[3]; - m_immediatesToFlush.push_back(vals); -} - -void StorageSoa::declareImmediates() -{ - if (m_immediatesToFlush.empty()) - return; - - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorChannels = ArrayType::get(vectorType, 4); - ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size()); - - m_immediates = new GlobalVariable( - /*Type=*/arrayType, - /*isConstant=*/false, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name("immediates"), - currentModule()); - - std::vector<Constant*> arrayVals; - for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { - std::vector<float> vec = m_immediatesToFlush[i]; - std::vector<float> vals(4); - std::vector<Constant*> channelArray; - - vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; - llvm::Constant *xChannel = createConstGlobalVector(vals); - - vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; - llvm::Constant *yChannel = createConstGlobalVector(vals); - - vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; - llvm::Constant *zChannel = createConstGlobalVector(vals); - - vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; - llvm::Constant *wChannel = createConstGlobalVector(vals); - channelArray.push_back(xChannel); - channelArray.push_back(yChannel); - channelArray.push_back(zChannel); - channelArray.push_back(wChannel); - Constant *constChannels = ConstantArray::get(vectorChannels, - channelArray); - arrayVals.push_back(constChannels); - } - Constant *constArray = ConstantArray::get(arrayType, arrayVals); - m_immediates->setInitializer(constArray); - - m_immediatesToFlush.clear(); -} - -llvm::Value *StorageSoa::addrElement(int idx) const -{ - std::map<int, llvm::Value*>::const_iterator itr = m_addresses.find(idx); - if (itr == m_addresses.end()) { - debug_printf("Trying to access invalid shader 'address'\n"); - return 0; - } - llvm::Value * res = (*itr).second; - - res = new LoadInst(res, name("addr"), false, m_block); - - return res; -} - -std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx) -{ - std::vector<llvm::Value*> res(4); - - res[0] = element(m_input, idx, 0); - res[1] = element(m_input, idx, 1); - res[2] = element(m_input, idx, 2); - res[3] = element(m_input, idx, 3); - - return res; -} - -llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc) -{ - std::vector<llvm::Value*> x(4); - x[0] = m_builder->CreateExtractElement(vector, - constantInt(cc), - name("x")); - - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - Constant *constVector = Constant::getNullValue(vectorType); - Value *res = m_builder->CreateInsertElement(constVector, x[0], - constantInt(0), - name("vecx")); - res = m_builder->CreateInsertElement(res, x[0], constantInt(1), - name("vecxx")); - res = m_builder->CreateInsertElement(res, x[0], constantInt(2), - name("vecxxx")); - res = m_builder->CreateInsertElement(res, x[0], constantInt(3), - name("vecxxxx")); - return res; -} - -std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) -{ - llvm::Value* res; - std::vector<llvm::Value*> res2(4); - llvm::Value *xChannel; - - xChannel = elementPointer(m_consts, idx, 0); - - res = alignedArrayLoad(xChannel); - - res2[0]=unpackConstElement(m_builder, res,0); - res2[1]=unpackConstElement(m_builder, res,1); - res2[2]=unpackConstElement(m_builder, res,2); - res2[3]=unpackConstElement(m_builder, res,3); - - return res2; -} - -std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx) -{ - std::vector<llvm::Value*> res(4); - - res[0] = element(m_output, idx, 0); - res[1] = element(m_output, idx, 1); - res[2] = element(m_output, idx, 2); - res[3] = element(m_output, idx, 3); - - return res; -} - -std::vector<llvm::Value*> StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx) -{ - std::vector<llvm::Value*> res(4); - llvm::Value *temp = m_temps[idx]; - - res[0] = element(temp, constantInt(0), 0); - res[1] = element(temp, constantInt(0), 1); - res[2] = element(temp, constantInt(0), 2); - res[3] = element(temp, constantInt(0), 3); - - return res; -} - -std::vector<llvm::Value*> StorageSoa::immediateElement(llvm::Value *idx) -{ - std::vector<llvm::Value*> res(4); - - res[0] = element(m_immediates, idx, 0); - res[1] = element(m_immediates, idx, 1); - res[2] = element(m_immediates, idx, 2); - res[3] = element(m_immediates, idx, 3); - - return res; -} - -llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, - int channel) const -{ - std::vector<Value*> indices; - if (m_immediates == ptr) - indices.push_back(constantInt(0)); - indices.push_back(index); - indices.push_back(constantInt(channel)); - - GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr, - indices.begin(), - indices.end(), - name("ptr"), - m_block); - return getElem; -} - -llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index, - int channel) const -{ - llvm::Value *res = elementPointer(ptr, index, channel); - LoadInst *load = new LoadInst(res, name("element"), false, m_block); - //load->setAlignment(8); - return load; -} - -const char * StorageSoa::name(const char *prefix) const -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::ConstantInt * StorageSoa::constantInt(int idx) const -{ - if (m_constInts.find(idx) != m_constInts.end()) { - return m_constInts[idx]; - } - ConstantInt *constInt = ConstantInt::get(APInt(32, idx)); - m_constInts[idx] = constInt; - return constInt; -} - -llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - PointerType *vectorPtr = PointerType::get(vectorType, 0); - - CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); - LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); - load->setAlignment(8); - return load; -} - -llvm::Module * StorageSoa::currentModule() const -{ - if (!m_block || !m_block->getParent()) - return 0; - - return m_block->getParent()->getParent(); -} - -llvm::Constant * StorageSoa::createConstGlobalFloat(const float val) -{ - Constant*c = ConstantFP::get(APFloat(val)); - return c; -} - -llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - std::vector<Constant*> immValues; - ConstantFP *constx = ConstantFP::get(APFloat(vec[0])); - ConstantFP *consty = ConstantFP::get(APFloat(vec[1])); - ConstantFP *constz = ConstantFP::get(APFloat(vec[2])); - ConstantFP *constw = ConstantFP::get(APFloat(vec[3])); - immValues.push_back(constx); - immValues.push_back(consty); - immValues.push_back(constz); - immValues.push_back(constw); - Constant *constVector = ConstantVector::get(vectorType, immValues); - - return constVector; -} - -std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, - llvm::IRBuilder<>* m_builder,llvm::Value *indIdx) -{ - std::vector<llvm::Value*> val(4); - - //if we have an indirect index, always use that - // if not use the integer offset to create one - llvm::Value *realIndex = 0; - if (indIdx) - realIndex = indIdx; - else - realIndex = constantInt(idx); - debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); - - switch(type) { - case TGSI_FILE_INPUT: - val = inputElement(realIndex); - break; - case TGSI_FILE_OUTPUT: - val = outputElement(realIndex); - break; - case TGSI_FILE_TEMPORARY: - val = tempElement(m_builder, idx); - break; - case TGSI_FILE_CONSTANT: - val = constElement(m_builder, realIndex); - break; - case TGSI_FILE_IMMEDIATE: - val = immediateElement(realIndex); - break; - case TGSI_FILE_ADDRESS: - debug_printf("Address not handled in the load phase!\n"); - assert(0); - break; - default: - debug_printf("Unknown load!\n"); - assert(0); - break; - } - if (!gallivm_is_swizzle(swizzle)) - return val; - - std::vector<llvm::Value*> res(4); - - res[0] = val[gallivm_x_swizzle(swizzle)]; - res[1] = val[gallivm_y_swizzle(swizzle)]; - res[2] = val[gallivm_z_swizzle(swizzle)]; - res[3] = val[gallivm_w_swizzle(swizzle)]; - return res; -} - -llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder) -{ - VectorType *vector = VectorType::get(Type::FloatTy, 4); - ArrayType *vecArray = ArrayType::get(vector, 4); - AllocaInst *alloca = new AllocaInst(vecArray, "temp", - m_builder->GetInsertBlock()); - - return alloca; -} - - -void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, - int mask, llvm::IRBuilder<>* m_builder) -{ - llvm::Value *out = 0; - llvm::Value *realIndex = 0; - switch(type) { - case TGSI_FILE_OUTPUT: - out = m_output; - realIndex = constantInt(idx); - break; - case TGSI_FILE_TEMPORARY: - // if that temp doesn't already exist, alloca it - if (m_temps.find(idx) == m_temps.end()) - m_temps[idx] = allocaTemp(m_builder); - - out = m_temps[idx]; - - realIndex = constantInt(0); - break; - case TGSI_FILE_INPUT: - out = m_input; - realIndex = constantInt(idx); - break; - case TGSI_FILE_ADDRESS: { - llvm::Value *addr = m_addresses[idx]; - if (!addr) { - addAddress(idx); - addr = m_addresses[idx]; - assert(addr); - } - new StoreInst(val[0], addr, false, m_block); - return; - break; - } - default: - debug_printf("Can't save output of this type: %d !\n", type); - assert(0); - break; - } - if ((mask & TGSI_WRITEMASK_X)) { - llvm::Value *xChannel = elementPointer(out, realIndex, 0); - new StoreInst(val[0], xChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_Y)) { - llvm::Value *yChannel = elementPointer(out, realIndex, 1); - new StoreInst(val[1], yChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_Z)) { - llvm::Value *zChannel = elementPointer(out, realIndex, 2); - new StoreInst(val[2], zChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_W)) { - llvm::Value *wChannel = elementPointer(out, realIndex, 3); - new StoreInst(val[3], wChannel, false, m_block); - } -} - -void StorageSoa::addAddress(int idx) -{ - GlobalVariable *val = new GlobalVariable( - /*Type=*/IntegerType::get(32), - /*isConstant=*/false, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name("address"), - currentModule()); - val->setInitializer(Constant::getNullValue(IntegerType::get(32))); - - debug_printf("adding to %d\n", idx); - m_addresses[idx] = val; -} diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h deleted file mode 100644 index 56886f85e7a..00000000000 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ /dev/null @@ -1,107 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef STORAGESOA_H -#define STORAGESOA_H - -#include <pipe/p_shader_tokens.h> -#include <llvm/Support/IRBuilder.h> - -#include <vector> -#include <list> -#include <map> - -namespace llvm { - class BasicBlock; - class Constant; - class ConstantInt; - class GlobalVariable; - class LoadInst; - class Value; - class VectorType; - class Module; -} - -class StorageSoa -{ -public: - StorageSoa(llvm::BasicBlock *block, - llvm::Value *input, - llvm::Value *output, - llvm::Value *consts); - - - std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, - llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); - void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, - int mask, llvm::IRBuilder<>* m_builder); - - void addImmediate(float *vec); - void declareImmediates(); - - void addAddress(int idx); - - llvm::Value * addrElement(int idx) const; - - llvm::ConstantInt *constantInt(int) const; -private: - llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx, - int channel) const; - llvm::Value *element(llvm::Value *ptr, llvm::Value *idx, - int channel) const; - const char *name(const char *prefix) const; - llvm::Value *alignedArrayLoad(llvm::Value *val); - llvm::Module *currentModule() const; - llvm::Constant *createConstGlobalFloat(const float val); - llvm::Constant *createConstGlobalVector(const std::vector<float> &vec); - - std::vector<llvm::Value*> inputElement(llvm::Value *indIdx); - llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); - std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); - std::vector<llvm::Value*> outputElement(llvm::Value *indIdx); - std::vector<llvm::Value*> tempElement(llvm::IRBuilder<>* m_builder, int idx); - std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx); -private: - llvm::BasicBlock *m_block; - - llvm::Value *m_input; - llvm::Value *m_output; - llvm::Value *m_consts; - std::map<int, llvm::Value*> m_temps; - llvm::GlobalVariable *m_immediates; - - std::map<int, llvm::Value*> m_addresses; - - std::vector<std::vector<float> > m_immediatesToFlush; - llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder); - - mutable std::map<int, llvm::ConstantInt*> m_constInts; - mutable char m_name[32]; - mutable int m_idx; -}; - -#endif diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp deleted file mode 100644 index 8f7d3b71004..00000000000 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ /dev/null @@ -1,1136 +0,0 @@ -#include "tgsitollvm.h" - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "storage.h" -#include "instructions.h" -#include "storagesoa.h" -#include "instructionssoa.h" - -#include "pipe/p_shader_tokens.h" - -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_build.h" -#include "tgsi/tgsi_dump.h" - - -#include <llvm/Module.h> -#include <llvm/CallingConv.h> -#include <llvm/Constants.h> -#include <llvm/DerivedTypes.h> -#include <llvm/Instructions.h> -#include <llvm/ModuleProvider.h> -#include <llvm/Pass.h> -#include <llvm/PassManager.h> -#include <llvm/Attributes.h> -#include <llvm/Support/PatternMatch.h> -#include <llvm/ExecutionEngine/JIT.h> -#include <llvm/ExecutionEngine/Interpreter.h> -#include <llvm/ExecutionEngine/GenericValue.h> -#include <llvm/Support/MemoryBuffer.h> -#include <llvm/LinkAllPasses.h> -#include <llvm/Analysis/Verifier.h> -#include <llvm/Analysis/LoopPass.h> -#include <llvm/Target/TargetData.h> -#include <llvm/Bitcode/ReaderWriter.h> -#include <llvm/Transforms/Utils/Cloning.h> - - -#include <sstream> -#include <fstream> -#include <iostream> - -using namespace llvm; - -static inline FunctionType *vertexShaderFunctionType() -{ - //Function takes three arguments, - // the calling code has to make sure the types it will - // pass are castable to the following: - // [4 x <4 x float>] inputs, - // [4 x <4 x float>] output, - // [4 x [1 x float]] consts, - - std::vector<const Type*> funcArgs; - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorArray = ArrayType::get(vectorType, 4); - PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); - - ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); - ArrayType *constsArray = ArrayType::get(floatArray, 1); - PointerType *constsArrayPtr = PointerType::get(constsArray, 0); - - funcArgs.push_back(vectorArrayPtr);//inputs - funcArgs.push_back(vectorArrayPtr);//output - funcArgs.push_back(constsArrayPtr);//consts - - FunctionType *functionType = FunctionType::get( - /*Result=*/Type::VoidTy, - /*Params=*/funcArgs, - /*isVarArg=*/false); - - return functionType; -} - -static inline void -add_interpolator(struct gallivm_ir *ir, - struct gallivm_interpolate *interp) -{ - ir->interpolators[ir->num_interp] = *interp; - ++ir->num_interp; -} - -static void -translate_declaration(struct gallivm_ir *prog, - llvm::Module *module, - Storage *storage, - struct tgsi_full_declaration *decl, - struct tgsi_full_declaration *fd) -{ - if (decl->Declaration.File == TGSI_FILE_INPUT) { - unsigned first, last, mask; - uint interp_method; - - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; - - /* Do not touch WPOS.xy */ - if (first == 0) { - mask &= ~TGSI_WRITEMASK_XY; - if (mask == TGSI_WRITEMASK_NONE) { - first++; - if (first > last) { - return; - } - } - } - - interp_method = decl->Declaration.Interpolate; - - if (mask == TGSI_WRITEMASK_XYZW) { - unsigned i, j; - - for (i = first; i <= last; i++) { - for (j = 0; j < NUM_CHANNELS; j++) { - //interp( mach, i, j ); - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } - } - } -} - -static void -translate_declarationir(struct gallivm_ir *, - llvm::Module *, - StorageSoa *storage, - struct tgsi_full_declaration *decl, - struct tgsi_full_declaration *) -{ - if (decl->Declaration.File == TGSI_FILE_ADDRESS) { - int idx = decl->Range.First; - storage->addAddress(idx); - } -} - -static void -translate_immediate(Storage *storage, - struct tgsi_full_immediate *imm) -{ - float vec[4]; - int i; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - vec[i] = imm->u[i].Float; - break; - default: - assert(0); - } - } - storage->addImmediate(vec); -} - - -static void -translate_immediateir(StorageSoa *storage, - struct tgsi_full_immediate *imm) -{ - float vec[4]; - int i; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - vec[i] = imm->u[i].Float; - break; - default: - assert(0); - } - } - storage->addImmediate(vec); -} - -static inline int -swizzleInt(struct tgsi_full_src_register *src) -{ - int swizzle = 0; - int start = 1000; - - for (int k = 0; k < 4; ++k) { - swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; - start /= 10; - } - return swizzle; -} - -static inline llvm::Value * -swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, - Storage *storage) -{ - int swizzle = swizzleInt(src); - - if (gallivm_is_swizzle(swizzle)) { - /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ - val = storage->shuffleVector(val, swizzle); - } - return val; -} - -static void -translate_instruction(llvm::Module *module, - Storage *storage, - Instructions *instr, - struct tgsi_full_instruction *inst, - struct tgsi_full_instruction *fi, - unsigned instno) -{ - llvm::Value *inputs[4]; - inputs[0] = 0; - inputs[1] = 0; - inputs[2] = 0; - inputs[3] = 0; - - for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->Src[i]; - llvm::Value *val = 0; - llvm::Value *indIdx = 0; - - if (src->Register.Indirect) { - indIdx = storage->addrElement(src->Indirect.Index); - indIdx = storage->extractIndex(indIdx); - } - if (src->Register.File == TGSI_FILE_CONSTANT) { - val = storage->constElement(src->Register.Index, indIdx); - } else if (src->Register.File == TGSI_FILE_INPUT) { - val = storage->inputElement(src->Register.Index, indIdx); - } else if (src->Register.File == TGSI_FILE_TEMPORARY) { - val = storage->tempElement(src->Register.Index); - } else if (src->Register.File == TGSI_FILE_OUTPUT) { - val = storage->outputElement(src->Register.Index, indIdx); - } else if (src->Register.File == TGSI_FILE_IMMEDIATE) { - val = storage->immediateElement(src->Register.Index); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->Register.File); - return; - } - - inputs[i] = swizzleVector(val, src, storage); - } - - /*if (inputs[0]) - instr->printVector(inputs[0]); - if (inputs[1]) - instr->printVector(inputs[1]);*/ - llvm::Value *out = 0; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: { - out = instr->arl(inputs[0]); - } - break; - case TGSI_OPCODE_MOV: { - out = inputs[0]; - } - break; - case TGSI_OPCODE_LIT: { - out = instr->lit(inputs[0]); - } - break; - case TGSI_OPCODE_RCP: { - out = instr->rcp(inputs[0]); - } - break; - case TGSI_OPCODE_RSQ: { - out = instr->rsq(inputs[0]); - } - break; - case TGSI_OPCODE_EXP: { - out = instr->exp(inputs[0]); - } - break; - case TGSI_OPCODE_LOG: { - out = instr->log(inputs[0]); - } - break; - case TGSI_OPCODE_MUL: { - out = instr->mul(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ADD: { - out = instr->add(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP3: { - out = instr->dp3(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP4: { - out = instr->dp4(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DST: { - out = instr->dst(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MIN: { - out = instr->min(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAX: { - out = instr->max(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SLT: { - out = instr->slt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGE: { - out = instr->sge(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAD: { - out = instr->madd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SUB: { - out = instr->sub(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_LRP: { - out = instr->lerp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_CND: { - out = instr->cnd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_CND0: { - out = instr->cnd0(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_DP2A: { - out = instr->dot2add(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_FRC: { - out = instr->frc(inputs[0]); - } - break; - case TGSI_OPCODE_CLAMP: { - out = instr->clamp(inputs[0]); - } - break; - case TGSI_OPCODE_FLR: { - out = instr->floor(inputs[0]); - } - break; - case TGSI_OPCODE_ROUND: - break; - case TGSI_OPCODE_EX2: { - out = instr->ex2(inputs[0]); - } - break; - case TGSI_OPCODE_LG2: { - out = instr->lg2(inputs[0]); - } - break; - case TGSI_OPCODE_POW: { - out = instr->pow(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_XPD: { - out = instr->cross(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ABS: { - out = instr->abs(inputs[0]); - } - break; - case TGSI_OPCODE_RCC: - break; - case TGSI_OPCODE_DPH: { - out = instr->dph(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_COS: { - out = instr->cos(inputs[0]); - } - break; - case TGSI_OPCODE_DDX: { - out = instr->ddx(inputs[0]); - } - break; - case TGSI_OPCODE_DDY: { - out = instr->ddy(inputs[0]); - } - break; - case TGSI_OPCODE_KILP: - break; - case TGSI_OPCODE_PK2H: - break; - case TGSI_OPCODE_PK2US: - break; - case TGSI_OPCODE_PK4B: - break; - case TGSI_OPCODE_PK4UB: - break; - case TGSI_OPCODE_RFL: - break; - case TGSI_OPCODE_SEQ: { - out = instr->seq(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SFL: { - out = instr->sfl(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGT: { - out = instr->sgt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SIN: { - out = instr->sin(inputs[0]); - } - break; - case TGSI_OPCODE_SLE: { - out = instr->sle(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SNE: { - out = instr->sne(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_STR: { - out = instr->str(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_TEX: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_UP2H: - break; - case TGSI_OPCODE_UP2US: - break; - case TGSI_OPCODE_UP4B: - break; - case TGSI_OPCODE_UP4UB: - break; - case TGSI_OPCODE_X2D: { - out = instr->x2d(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_ARA: - break; - case TGSI_OPCODE_ARR: - break; - case TGSI_OPCODE_BRA: - break; - case TGSI_OPCODE_CAL: { - instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); - return; - } - break; - case TGSI_OPCODE_RET: { - instr->end(); - return; - } - break; - case TGSI_OPCODE_SSG: - break; - case TGSI_OPCODE_CMP: { - out = instr->cmp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SCS: { - out = instr->scs(inputs[0]); - } - break; - case TGSI_OPCODE_TXB: - break; - case TGSI_OPCODE_NRM4: - case TGSI_OPCODE_NRM: { - out = instr->nrm(inputs[0]); - } - break; - case TGSI_OPCODE_DIV: { - out = instr->div(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP2: { - out = instr->dp2(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_TXL: - break; - case TGSI_OPCODE_BRK: { - instr->brk(); - return; - } - break; - case TGSI_OPCODE_IF: { - instr->ifop(inputs[0]); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_BGNFOR: - break; - case TGSI_OPCODE_REP: - break; - case TGSI_OPCODE_ELSE: { - instr->elseop(); - storage->setCurrentBlock(instr->currentBlock()); - return; //only state update - } - break; - case TGSI_OPCODE_ENDIF: { - instr->endif(); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_ENDFOR: - break; - case TGSI_OPCODE_ENDREP: - break; - case TGSI_OPCODE_PUSHA: - break; - case TGSI_OPCODE_POPA: - break; - case TGSI_OPCODE_CEIL: - break; - case TGSI_OPCODE_I2F: - break; - case TGSI_OPCODE_NOT: - break; - case TGSI_OPCODE_TRUNC: { - out = instr->trunc(inputs[0]); - } - break; - case TGSI_OPCODE_SHL: - break; - case TGSI_OPCODE_ISHR: - break; - case TGSI_OPCODE_AND: - break; - case TGSI_OPCODE_OR: - break; - case TGSI_OPCODE_MOD: - break; - case TGSI_OPCODE_XOR: - break; - case TGSI_OPCODE_SAD: - break; - case TGSI_OPCODE_TXF: - break; - case TGSI_OPCODE_TXQ: - break; - case TGSI_OPCODE_CONT: - break; - case TGSI_OPCODE_EMIT: - break; - case TGSI_OPCODE_ENDPRIM: - break; - case TGSI_OPCODE_BGNLOOP: { - instr->beginLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_BGNSUB: { - instr->bgnSub(instno); - storage->setCurrentBlock(instr->currentBlock()); - storage->pushTemps(); - return; - } - break; - case TGSI_OPCODE_ENDLOOP: { - instr->endLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_ENDSUB: { - instr->endSub(); - storage->setCurrentBlock(instr->currentBlock()); - storage->popArguments(); - storage->popTemps(); - return; - } - break; - case TGSI_OPCODE_NOISE1: - break; - case TGSI_OPCODE_NOISE2: - break; - case TGSI_OPCODE_NOISE3: - break; - case TGSI_OPCODE_NOISE4: - break; - case TGSI_OPCODE_NOP: - break; - case TGSI_OPCODE_CALLNZ: - break; - case TGSI_OPCODE_IFC: - break; - case TGSI_OPCODE_BREAKC: - break; - case TGSI_OPCODE_KIL: { - out = instr->kil(inputs[0]); - storage->setKilElement(out); - return; - } - break; - case TGSI_OPCODE_END: - instr->end(); - return; - break; - default: - fprintf(stderr, "ERROR: Unknown opcode %d\n", - inst->Instruction.Opcode); - assert(0); - break; - } - - if (!out) { - fprintf(stderr, "ERROR: unsupported opcode %d\n", - inst->Instruction.Opcode); - assert(!"Unsupported opcode"); - } - - /* # not sure if we need this */ - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - /*TXT( "_SAT" );*/ - break; - case TGSI_SAT_MINUS_PLUS_ONE: - /*TXT( "_SAT[-1,1]" );*/ - break; - default: - assert( 0 ); - } - - /* store results */ - for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->Dst[i]; - - if (dst->Register.File == TGSI_FILE_OUTPUT) { - storage->setOutputElement(dst->Register.Index, out, dst->Register.WriteMask); - } else if (dst->Register.File == TGSI_FILE_TEMPORARY) { - storage->setTempElement(dst->Register.Index, out, dst->Register.WriteMask); - } else if (dst->Register.File == TGSI_FILE_ADDRESS) { - storage->setAddrElement(dst->Register.Index, out, dst->Register.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } - } -} - - -static void -translate_instructionir(llvm::Module *module, - StorageSoa *storage, - InstructionsSoa *instr, - struct tgsi_full_instruction *inst, - struct tgsi_full_instruction *fi, - unsigned instno) -{ - std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs); - - for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->Src[i]; - std::vector<llvm::Value*> val; - llvm::Value *indIdx = 0; - int swizzle = swizzleInt(src); - - if (src->Register.Indirect) { - indIdx = storage->addrElement(src->Indirect.Index); - } - val = storage->load((enum tgsi_file_type)src->Register.File, - src->Register.Index, swizzle, instr->getIRBuilder(), indIdx); - - inputs[i] = val; - } - - std::vector<llvm::Value*> out(4); - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: { - out = instr->arl(inputs[0]); - } - break; - case TGSI_OPCODE_MOV: { - out = inputs[0]; - } - break; - case TGSI_OPCODE_LIT: { - out = instr->lit(inputs[0]); - } - break; - case TGSI_OPCODE_RCP: { - } - break; - case TGSI_OPCODE_RSQ: { - out = instr->rsq(inputs[0]); - } - break; - case TGSI_OPCODE_EXP: - break; - case TGSI_OPCODE_LOG: - break; - case TGSI_OPCODE_MUL: { - out = instr->mul(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ADD: { - out = instr->add(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP3: { - out = instr->dp3(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP4: { - out = instr->dp4(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DST: { - } - break; - case TGSI_OPCODE_MIN: { - out = instr->min(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAX: { - out = instr->max(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SLT: { - out = instr->slt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGE: { - } - break; - case TGSI_OPCODE_MAD: { - out = instr->madd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SUB: { - out = instr->sub(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_LRP: { - } - break; - case TGSI_OPCODE_CND: - break; - case TGSI_OPCODE_CND0: - break; - case TGSI_OPCODE_DP2A: - break; - case TGSI_OPCODE_FRC: { - } - break; - case TGSI_OPCODE_CLAMP: - break; - case TGSI_OPCODE_FLR: { - } - break; - case TGSI_OPCODE_ROUND: - break; - case TGSI_OPCODE_EX2: { - } - break; - case TGSI_OPCODE_LG2: { - } - break; - case TGSI_OPCODE_POW: { - out = instr->pow(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_XPD: { - } - break; - case TGSI_OPCODE_ABS: { - out = instr->abs(inputs[0]); - } - break; - case TGSI_OPCODE_RCC: - break; - case TGSI_OPCODE_DPH: { - } - break; - case TGSI_OPCODE_COS: { - } - break; - case TGSI_OPCODE_DDX: - break; - case TGSI_OPCODE_DDY: - break; - case TGSI_OPCODE_KILP: - break; - case TGSI_OPCODE_PK2H: - break; - case TGSI_OPCODE_PK2US: - break; - case TGSI_OPCODE_PK4B: - break; - case TGSI_OPCODE_PK4UB: - break; - case TGSI_OPCODE_RFL: - break; - case TGSI_OPCODE_SEQ: - break; - case TGSI_OPCODE_SFL: - break; - case TGSI_OPCODE_SGT: { - } - break; - case TGSI_OPCODE_SIN: { - } - break; - case TGSI_OPCODE_SLE: - break; - case TGSI_OPCODE_SNE: - break; - case TGSI_OPCODE_STR: - break; - case TGSI_OPCODE_TEX: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_UP2H: - break; - case TGSI_OPCODE_UP2US: - break; - case TGSI_OPCODE_UP4B: - break; - case TGSI_OPCODE_UP4UB: - break; - case TGSI_OPCODE_X2D: - break; - case TGSI_OPCODE_ARA: - break; - case TGSI_OPCODE_ARR: - break; - case TGSI_OPCODE_BRA: - break; - case TGSI_OPCODE_CAL: { - } - break; - case TGSI_OPCODE_RET: { - } - break; - case TGSI_OPCODE_SSG: - break; - case TGSI_OPCODE_CMP: { - } - break; - case TGSI_OPCODE_SCS: { - } - break; - case TGSI_OPCODE_TXB: - break; - case TGSI_OPCODE_NRM: - break; - case TGSI_OPCODE_DIV: - break; - case TGSI_OPCODE_DP2: - break; - case TGSI_OPCODE_TXL: - break; - case TGSI_OPCODE_BRK: { - } - break; - case TGSI_OPCODE_IF: { - } - break; - case TGSI_OPCODE_BGNFOR: - break; - case TGSI_OPCODE_REP: - break; - case TGSI_OPCODE_ELSE: { - } - break; - case TGSI_OPCODE_ENDIF: { - } - break; - case TGSI_OPCODE_ENDFOR: - break; - case TGSI_OPCODE_ENDREP: - break; - case TGSI_OPCODE_PUSHA: - break; - case TGSI_OPCODE_POPA: - break; - case TGSI_OPCODE_CEIL: - break; - case TGSI_OPCODE_I2F: - break; - case TGSI_OPCODE_NOT: - break; - case TGSI_OPCODE_TRUNC: { - } - break; - case TGSI_OPCODE_SHL: - break; - case TGSI_OPCODE_ISHR: - break; - case TGSI_OPCODE_AND: - break; - case TGSI_OPCODE_OR: - break; - case TGSI_OPCODE_MOD: - break; - case TGSI_OPCODE_XOR: - break; - case TGSI_OPCODE_SAD: - break; - case TGSI_OPCODE_TXF: - break; - case TGSI_OPCODE_TXQ: - break; - case TGSI_OPCODE_CONT: - break; - case TGSI_OPCODE_EMIT: - break; - case TGSI_OPCODE_ENDPRIM: - break; - case TGSI_OPCODE_BGNLOOP: { - } - break; - case TGSI_OPCODE_BGNSUB: { - } - break; - case TGSI_OPCODE_ENDLOOP: { - } - break; - case TGSI_OPCODE_ENDSUB: { - } - break; - case TGSI_OPCODE_NOISE1: - break; - case TGSI_OPCODE_NOISE2: - break; - case TGSI_OPCODE_NOISE3: - break; - case TGSI_OPCODE_NOISE4: - break; - case TGSI_OPCODE_NOP: - break; - case TGSI_OPCODE_NRM4: - break; - case TGSI_OPCODE_CALLNZ: - break; - case TGSI_OPCODE_IFC: - break; - case TGSI_OPCODE_BREAKC: - break; - case TGSI_OPCODE_KIL: { - } - break; - case TGSI_OPCODE_END: - instr->end(); - return; - break; - default: - fprintf(stderr, "ERROR: Unknown opcode %d\n", - inst->Instruction.Opcode); - assert(0); - break; - } - - if (!out[0]) { - fprintf(stderr, "ERROR: unsupported opcode %d\n", - inst->Instruction.Opcode); - assert(!"Unsupported opcode"); - } - - /* store results */ - for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->Dst[i]; - storage->store((enum tgsi_file_type)dst->Register.File, - dst->Register.Index, out, dst->Register.WriteMask, - instr->getIRBuilder() ); - } -} - -llvm::Module * -tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) -{ - llvm::Module *mod = new Module("shader"); - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; - Function* shader = mod->getFunction("execute_shader"); - std::ostringstream stream; - if (ir->type == GALLIVM_VS) { - stream << "vs_shader"; - } else { - stream << "fs_shader"; - } - stream << ir->id; - std::string func_name = stream.str(); - shader->setName(func_name.c_str()); - - Function::arg_iterator args = shader->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("input"); - - BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); - - tgsi_parse_init(&parse, tokens); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - Storage storage(label_entry, ptr_INPUT); - Instructions instr(mod, shader, label_entry, &storage); - while(!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - translate_declaration(ir, mod, &storage, - &parse.FullToken.FullDeclaration, - &fd); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - translate_immediate(&storage, - &parse.FullToken.FullImmediate); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - translate_instruction(mod, &storage, &instr, - &parse.FullToken.FullInstruction, - &fi, instno); - ++instno; - break; - - default: - assert(0); - } - } - - tgsi_parse_free(&parse); - - ir->num_consts = storage.numConsts(); - return mod; -} - -llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, - const struct tgsi_token *tokens) -{ - llvm::Module *mod = new Module("shader"); - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; - std::ostringstream stream; - if (ir->type == GALLIVM_VS) { - stream << "vs_shader"; - } else { - stream << "fs_shader"; - } - //stream << ir->id; - std::string func_name = stream.str(); - Function *shader = llvm::cast<Function>(mod->getOrInsertFunction( - func_name.c_str(), - vertexShaderFunctionType())); - - Function::arg_iterator args = shader->arg_begin(); - Value *input = args++; - input->setName("inputs"); - Value *output = args++; - output->setName("outputs"); - Value *consts = args++; - consts->setName("consts"); - - BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); - - tgsi_parse_init(&parse, tokens); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - StorageSoa storage(label_entry, input, output, consts); - InstructionsSoa instr(mod, shader, label_entry, &storage); - - while(!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - translate_declarationir(ir, mod, &storage, - &parse.FullToken.FullDeclaration, - &fd); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - translate_immediateir(&storage, - &parse.FullToken.FullImmediate); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - storage.declareImmediates(); - translate_instructionir(mod, &storage, &instr, - &parse.FullToken.FullInstruction, - &fi, instno); - ++instno; - break; - - default: - assert(0); - } - } - - tgsi_parse_free(&parse); - - return mod; -} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h deleted file mode 100644 index 7ada04d6299..00000000000 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef TGSITOLLVM_H -#define TGSITOLLVM_H - - -namespace llvm { - class Module; -} - -struct gallivm_ir; -struct tgsi_token; - - -llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, - const struct tgsi_token *tokens); - - -llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, - const struct tgsi_token *tokens); - -#endif diff --git a/src/gallium/auxiliary/os/os_memory.h b/src/gallium/auxiliary/os/os_memory.h new file mode 100644 index 00000000000..556662d35e1 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory.h @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * OS memory management abstractions + */ + + +#ifndef _OS_MEMORY_H_ +#define _OS_MEMORY_H_ + + +#include "pipe/p_config.h" +#include "pipe/p_compiler.h" + + +#if defined(PIPE_OS_EMBEDDED) + +#ifdef __cplusplus +extern "C" { +#endif + +void * +os_malloc(size_t size); + +void * +os_calloc(size_t count, size_t size); + +void +os_free(void *ptr); + +void * +os_realloc(void *ptr, size_t old_size, size_t new_size); + +void * +os_malloc_aligned(size_t size, size_t alignment); + +void +os_free_aligned(void *ptr); + +#ifdef __cplusplus +} +#endif + +#elif defined(PIPE_OS_WINDOWS) && defined(DEBUG) && !defined(DEBUG_MEMORY_IMPLEMENTATION) + +# include "os_memory_debug.h" + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +# include "os_memory_win32k.h" + +#else + +# include "os_memory_stdc.h" + +#endif + +#endif /* _OS_MEMORY_H_ */ diff --git a/src/gallium/auxiliary/os/os_memory_aligned.h b/src/gallium/auxiliary/os/os_memory_aligned.h new file mode 100644 index 00000000000..72c5cf65b66 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory_aligned.h @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Memory alignment wrappers. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + + +#include "pipe/p_compiler.h" + + +/** + * Return memory on given byte alignment + */ +static INLINE void * +os_malloc_aligned(size_t size, size_t alignment) +{ + char *ptr, *buf; + + ptr = (char *) os_malloc(size + alignment + sizeof(void *)); + if (!ptr) + return NULL; + + buf = (char *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & ~((uintptr_t)(alignment - 1))); + *(char **)(buf - sizeof(void *)) = ptr; + + return buf; +} + + +/** + * Free memory returned by align_malloc(). + */ +static INLINE void +os_free_aligned(void *ptr) +{ + if (ptr) { + void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); + void *realAddr = *cubbyHole; + os_free(realAddr); + } +} diff --git a/src/gallium/auxiliary/os/os_memory_debug.h b/src/gallium/auxiliary/os/os_memory_debug.h new file mode 100644 index 00000000000..c664be9aad5 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory_debug.h @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Debugging wrappers for OS memory management abstractions. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +void * +debug_malloc(const char *file, unsigned line, const char *function, + size_t size); + +void * +debug_calloc(const char *file, unsigned line, const char *function, + size_t count, size_t size ); + +void +debug_free(const char *file, unsigned line, const char *function, + void *ptr); + +void * +debug_realloc(const char *file, unsigned line, const char *function, + void *old_ptr, size_t old_size, size_t new_size ); + + +#ifdef __cplusplus +} +#endif + + +#ifndef DEBUG_MEMORY_IMPLEMENTATION + +#define os_malloc( _size ) \ + debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) +#define os_calloc( _count, _size ) \ + debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) +#define os_free( _ptr ) \ + debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) +#define os_realloc( _ptr, _old_size, _new_size ) \ + debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _new_size ) + +/* TODO: wrap os_malloc_aligned() and os_free_aligned() too */ +#include "os_memory_aligned.h" + +#endif /* !DEBUG_MEMORY_IMPLEMENTATION */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/auxiliary/os/os_memory_stdc.h index 161bab37991..806e5363568 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ b/src/gallium/auxiliary/os/os_memory_stdc.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,54 +18,59 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -#ifndef LP_TILE_CACHE_H -#define LP_TILE_CACHE_H +/* + * OS memory management abstractions for the standard C library. + */ -#include "pipe/p_compiler.h" -#include "lp_tile_soa.h" +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + +#include <stdlib.h> -struct llvmpipe_tile_cache; /* opaque */ +#include "pipe/p_compiler.h" -extern struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ); +#define os_malloc(_size) malloc(_size) +#define os_calloc(_count, _size ) calloc(_count, _size ) +#define os_free(_ptr) free(_ptr) -extern void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc); +#define os_realloc( _old_ptr, _old_size, _new_size) \ + realloc(_old_ptr, _new_size + 0*(_old_size)) -extern void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *lps); -extern struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc); +#if defined(HAVE_POSIX_MEMALIGN) -extern void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc); +static INLINE void * +os_malloc_aligned(size_t size, size_t alignment) +{ + void *ptr; + alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1); + if(posix_memalign(&ptr, alignment, size) != 0) + return NULL; + return ptr; +} -extern void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc); +#define os_free_aligned(_ptr) free(_ptr) -extern void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc); +#elif defined(PIPE_OS_WINDOWS) -extern void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue); +#include <malloc.h> -extern void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ); +#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align) +#define os_free_aligned(_ptr) _aligned_free(_ptr) +#else -#endif /* LP_TILE_CACHE_H */ +#include "os_memory_aligned.h" +#endif diff --git a/src/gallium/auxiliary/os/os_memory_win32k.h b/src/gallium/auxiliary/os/os_memory_win32k.h new file mode 100644 index 00000000000..d56d6908722 --- /dev/null +++ b/src/gallium/auxiliary/os/os_memory_win32k.h @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * OS memory management abstractions for Windows kernel. + */ + + +#ifndef _OS_MEMORY_H_ +#error "Must not be included directly. Include os_memory.h instead" +#endif + + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void * __stdcall +EngAllocMem(unsigned long Flags, + unsigned long MemSize, + unsigned long Tag); + +void __stdcall +EngFreeMem(void *Mem); + +#define os_malloc(_size) EngAllocMem(0, _size, 'D3AG') +#define os_calloc(_count, _size) EngAllocMem(1, (_count)*(_size), 'D3AG') +#define _os_free(_ptr) EngFreeMem(_ptr) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +void * +ExAllocatePool(unsigned long PoolType, + size_t NumberOfBytes); + +void +ExFreePool(void *P); + +#define os_malloc(_size) ExAllocatePool(0, _size) +#define _os_free(_ptr) ExFreePool(_ptr) + +static INLINE void * +os_calloc(unsigned count, unsigned size) +{ + void *ptr = os_malloc(count * size); + if (ptr) { + memset(ptr, 0, count * size); + } + return ptr; +} + +#else + +#error "Unsupported subsystem" + +#endif + + +static INLINE void +os_free( void *ptr ) +{ + if (ptr) { + _os_free(ptr); + } +} + + +static INLINE void * +os_realloc(void *old_ptr, unsigned old_size, unsigned new_size) +{ + void *new_ptr = NULL; + + if (new_size != 0) { + unsigned copy_size = old_size < new_size ? old_size : new_size; + new_ptr = os_malloc( new_size ); + if (new_ptr && old_ptr && copy_size) { + memcpy(new_ptr, old_ptr, copy_size); + } + } + + os_free(old_ptr); + + return new_ptr; +} + + +#ifdef __cplusplus +} +#endif + + +#include "os_memory_aligned.h" diff --git a/src/gallium/auxiliary/os/os_misc.c b/src/gallium/auxiliary/os/os_misc.c new file mode 100644 index 00000000000..384988017b7 --- /dev/null +++ b/src/gallium/auxiliary/os/os_misc.c @@ -0,0 +1,188 @@ +/************************************************************************** + * + * Copyright 2008-2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "os_misc.h" + +#include <stdarg.h> + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY + +#include <windows.h> +#include <winddi.h> + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +#include <stdio.h> +#include <stdlib.h> +#include <windows.h> +#include <types.h> + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#endif +#include <windows.h> +#include <stdio.h> + +#else + +#include <stdio.h> +#include <stdlib.h> + +#endif + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +static INLINE void +_EngDebugPrint(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + EngDebugPrint("", (PCHAR)format, ap); + va_end(ap); +} +#endif + + +void +os_log_message(const char *message) +{ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + _EngDebugPrint("%s", message); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + OutputDebugStringA(message); + if(GetConsoleWindow() && !IsDebuggerPresent()) { + fflush(stdout); + fputs(message, stderr); + fflush(stderr); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + wchar_t *wide_format; + long wide_str_len; + /* Format is ascii - needs to be converted to wchar_t for printing */ + wide_str_len = MultiByteToWideChar(CP_ACP, 0, message, -1, NULL, 0); + wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t)); + if (wide_format) { + MultiByteToWideChar(CP_ACP, 0, message, -1, + wide_format, wide_str_len); + NKDbgPrintfW(wide_format, wide_format); + free(wide_format); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* TODO */ +#else /* !PIPE_SUBSYSTEM_WINDOWS */ + fflush(stdout); + fputs(message, stderr); +#endif +} + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +static const char * +find(const char *start, const char *end, char c) +{ + const char *p; + for(p = start; !end || p != end; ++p) { + if(*p == c) + return p; + if(*p < 32) + break; + } + return NULL; +} + +static int +compare(const char *start, const char *end, const char *s) +{ + const char *p, *q; + for(p = start, q = s; p != end && *q != '\0'; ++p, ++q) { + if(*p != *q) + return 0; + } + return p == end && *q == '\0'; +} + +static void +copy(char *dst, const char *start, const char *end, size_t n) +{ + const char *p; + char *q; + for(p = start, q = dst, n = n - 1; p != end && n; ++p, ++q, --n) + *q = *p; + *q = '\0'; +} +#endif + + +const char * +os_get_option(const char *name) +{ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + /* EngMapFile creates the file if it does not exists, so it must either be + * disabled on release versions (or put in a less conspicuous place). */ +#ifdef DEBUG + const char *result = NULL; + ULONG_PTR iFile = 0; + const void *pMap = NULL; + const char *sol, *eol, *sep; + static char output[1024]; + + pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); + if(pMap) { + sol = (const char *)pMap; + while(1) { + /* TODO: handle LF line endings */ + eol = find(sol, NULL, '\r'); + if(!eol || eol == sol) + break; + sep = find(sol, eol, '='); + if(!sep) + break; + if(compare(sol, sep, name)) { + copy(output, sep + 1, eol, sizeof(output)); + result = output; + break; + } + sol = eol + 2; + } + EngUnmapFile(iFile); + } + return result; +#else + return NULL; +#endif +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* TODO: implement */ + return NULL; +#else + return getenv(name); +#endif +} + diff --git a/src/gallium/auxiliary/os/os_misc.h b/src/gallium/auxiliary/os/os_misc.h new file mode 100644 index 00000000000..d59f9819fec --- /dev/null +++ b/src/gallium/auxiliary/os/os_misc.h @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* + * Miscellaneous OS services. + */ + + +#ifndef _OS_MISC_H_ +#define _OS_MISC_H_ + + +#include "pipe/p_compiler.h" + + +#if defined(PIPE_OS_UNIX) +# include <signal.h> /* for kill() */ +# include <unistd.h> /* getpid() */ +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Trap into the debugger. + */ +#if (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) && defined(PIPE_CC_GCC) +# define os_break() __asm("int3") +#elif defined(PIPE_CC_MSVC) +# define os_break() __debugbreak() +#elif defined(PIPE_OS_UNIX) +# define os_break() kill(getpid(), SIGTRAP) +#elif defined(PIPE_OS_EMBEDDED) +void os_break(void); +#else +# define os_break() abort() +#endif + + +/* + * Abort the program. + */ +#if defined(DEBUG) || defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +# define os_abort() os_break() +#elif defined(PIPE_OS_EMBEDDED) +void os_abort(void); +#else +# define os_abort() abort() +#endif + + +/* + * Output a message. Message should preferably end in a newline. + */ +void +os_log_message(const char *message); + + +/* + * Get an option. Should return NULL if specified option is not set. + */ +const char * +os_get_option(const char *name); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _OS_MISC_H_ */ diff --git a/src/gallium/auxiliary/util/u_stream.h b/src/gallium/auxiliary/os/os_stream.h index a9d0f0121a6..bf30e6542d3 100644 --- a/src/gallium/auxiliary/util/u_stream.h +++ b/src/gallium/auxiliary/os/os_stream.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -30,14 +30,14 @@ * Cross-platform sequential access stream abstraction. */ -#ifndef U_STREAM_H -#define U_STREAM_H +#ifndef _OS_STREAM_H_ +#define _OS_STREAM_H_ #include "pipe/p_compiler.h" -struct util_stream; +struct os_stream; /** @@ -45,17 +45,17 @@ struct util_stream; * @param filename relative or absolute path (necessary for windows) * @param optional maximum file size (0 for a growable size). */ -struct util_stream * -util_stream_create(const char *filename, size_t max_size); +struct os_stream * +os_stream_create(const char *filename, size_t max_size); boolean -util_stream_write(struct util_stream *stream, const void *data, size_t size); +os_stream_write(struct os_stream *stream, const void *data, size_t size); void -util_stream_flush(struct util_stream *stream); +os_stream_flush(struct os_stream *stream); void -util_stream_close(struct util_stream *stream); +os_stream_close(struct os_stream *stream); -#endif /* U_STREAM_H */ +#endif /* _OS_STREAM_H_ */ diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c index 4d976d6dca4..caa60c0b50f 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/os/os_stream_stdc.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -32,47 +32,46 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_UNIX) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) +#include <stdlib.h> #include <stdio.h> -#include "util/u_memory.h" +#include "os_stream.h" -#include "u_stream.h" - -struct util_stream +struct os_stream { FILE *file; }; -struct util_stream * -util_stream_create(const char *filename, size_t max_size) +struct os_stream * +os_stream_create(const char *filename, size_t max_size) { - struct util_stream *stream; + struct os_stream *stream; (void)max_size; - stream = CALLOC_STRUCT(util_stream); + stream = (struct os_stream *)calloc(1, sizeof(struct os_stream)); if(!stream) - goto error1; + goto no_stream; stream->file = fopen(filename, "w"); if(!stream->file) - goto error2; + goto no_file; return stream; -error2: - FREE(stream); -error1: +no_file: + free(stream); +no_stream: return NULL; } boolean -util_stream_write(struct util_stream *stream, const void *data, size_t size) +os_stream_write(struct os_stream *stream, const void *data, size_t size) { if(!stream) return FALSE; @@ -82,7 +81,7 @@ util_stream_write(struct util_stream *stream, const void *data, size_t size) void -util_stream_flush(struct util_stream *stream) +os_stream_flush(struct os_stream *stream) { if(!stream) return; @@ -92,14 +91,14 @@ util_stream_flush(struct util_stream *stream) void -util_stream_close(struct util_stream *stream) +os_stream_close(struct os_stream *stream) { if(!stream) return; fclose(stream->file); - FREE(stream); + free(stream); } diff --git a/src/gallium/auxiliary/util/u_stream_wd.c b/src/gallium/auxiliary/os/os_stream_wd.c index 864489e7755..a64cbcab4cf 100644 --- a/src/gallium/auxiliary/util/u_stream_wd.c +++ b/src/gallium/auxiliary/os/os_stream_wd.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -37,16 +37,14 @@ #include <windows.h> #include <winddi.h> -#include "util/u_memory.h" -#include "util/u_string.h" - -#include "u_stream.h" +#include "os_memory.h" +#include "os_stream.h" #define MAP_FILE_SIZE (4*1024*1024) -struct util_stream +struct os_stream { char filename[MAX_PATH + 1]; WCHAR wFileName[MAX_PATH + 1]; @@ -60,23 +58,23 @@ struct util_stream static INLINE boolean -util_stream_map(struct util_stream *stream) +os_stream_map(struct os_stream *stream) { ULONG BytesInUnicodeString; static char filename[MAX_PATH + 1]; unsigned filename_len; if(stream->growable) - filename_len = util_snprintf(filename, - sizeof(filename), - "%s.%04x", - stream->filename, - stream->suffix++); + filename_len = snprintf(filename, + sizeof(filename), + "%s.%04x", + stream->filename, + stream->suffix++); else - filename_len = util_snprintf(filename, - sizeof(filename), - "%s", - stream->filename); + filename_len = snprintf(filename, + sizeof(filename), + "%s", + stream->filename); EngMultiByteToUnicodeN( stream->wFileName, @@ -97,7 +95,7 @@ util_stream_map(struct util_stream *stream) static INLINE void -util_stream_unmap(struct util_stream *stream) +os_stream_unmap(struct os_stream *stream) { EngUnmapFile(stream->iFile); if(stream->written < stream->map_size) { @@ -112,7 +110,7 @@ util_stream_unmap(struct util_stream *stream) static INLINE void -util_stream_full_qualified_filename(char *dst, size_t size, const char *src) +os_stream_full_qualified_filename(char *dst, size_t size, const char *src) { boolean need_drive, need_root; @@ -125,24 +123,24 @@ util_stream_full_qualified_filename(char *dst, size_t size, const char *src) need_root = src[0] == '\\' ? FALSE : TRUE; } - util_snprintf(dst, size, - "\\??\\%s%s%s", - need_drive ? "C:" : "", - need_root ? "\\" : "", - src); + snprintf(dst, size, + "\\??\\%s%s%s", + need_drive ? "C:" : "", + need_root ? "\\" : "", + src); } -struct util_stream * -util_stream_create(const char *filename, size_t max_size) +struct os_stream * +os_stream_create(const char *filename, size_t max_size) { - struct util_stream *stream; + struct os_stream *stream; - stream = CALLOC_STRUCT(util_stream); + stream = CALLOC_STRUCT(os_stream); if(!stream) goto error1; - util_stream_full_qualified_filename(stream->filename, + os_stream_full_qualified_filename(stream->filename, sizeof(stream->filename), filename); @@ -155,7 +153,7 @@ util_stream_create(const char *filename, size_t max_size) stream->map_size = MAP_FILE_SIZE; } - if(!util_stream_map(stream)) + if(!os_stream_map(stream)) goto error2; return stream; @@ -168,7 +166,7 @@ error1: static INLINE void -util_stream_copy(struct util_stream *stream, const char *data, size_t size) +os_stream_copy(struct os_stream *stream, const char *data, size_t size) { assert(stream->written + size <= stream->map_size); memcpy(stream->pMap + stream->written, data, size); @@ -177,7 +175,7 @@ util_stream_copy(struct util_stream *stream, const char *data, size_t size) boolean -util_stream_write(struct util_stream *stream, const void *data, size_t size) +os_stream_write(struct os_stream *stream, const void *data, size_t size) { if(!stream) return FALSE; @@ -187,35 +185,35 @@ util_stream_write(struct util_stream *stream, const void *data, size_t size) while(stream->written + size > stream->map_size) { size_t step = stream->map_size - stream->written; - util_stream_copy(stream, data, step); + os_stream_copy(stream, data, step); data = (const char *)data + step; size -= step; - util_stream_unmap(stream); - if(!stream->growable || !util_stream_map(stream)) + os_stream_unmap(stream); + if(!stream->growable || !os_stream_map(stream)) return FALSE; } - util_stream_copy(stream, data, size); + os_stream_copy(stream, data, size); return TRUE; } void -util_stream_flush(struct util_stream *stream) +os_stream_flush(struct os_stream *stream) { (void)stream; } void -util_stream_close(struct util_stream *stream) +os_stream_close(struct os_stream *stream) { if(!stream) return; - util_stream_unmap(stream); + os_stream_unmap(stream); FREE(stream); } diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/auxiliary/os/os_thread.h index 25e41482325..8ae90308c53 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -27,12 +27,13 @@ /** * @file * - * Thread, mutex, condition var and thread-specific data functions. + * Thread, mutex, condition variable, barrier, semaphore and + * thread-specific data functions. */ -#ifndef _P_THREAD2_H_ -#define _P_THREAD2_H_ +#ifndef OS_THREAD_H_ +#define OS_THREAD_H_ #include "pipe/p_compiler.h" @@ -46,6 +47,8 @@ #define PIPE_THREAD_HAVE_CONDVAR +/* pipe_thread + */ typedef pthread_t pipe_thread; #define PIPE_THREAD_ROUTINE( name, param ) \ @@ -69,8 +72,10 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) return pthread_detach( thread ); } + +/* pipe_mutex + */ typedef pthread_mutex_t pipe_mutex; -typedef pthread_cond_t pipe_condvar; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = PTHREAD_MUTEX_INITIALIZER @@ -87,6 +92,11 @@ typedef pthread_cond_t pipe_condvar; #define pipe_mutex_unlock(mutex) \ (void) pthread_mutex_unlock(&(mutex)) + +/* pipe_condvar + */ +typedef pthread_cond_t pipe_condvar; + #define pipe_static_condvar(mutex) \ static pipe_condvar mutex = PTHREAD_COND_INITIALIZER @@ -106,10 +116,32 @@ typedef pthread_cond_t pipe_condvar; pthread_cond_broadcast(&(cond)) +/* pipe_barrier + */ +typedef pthread_barrier_t pipe_barrier; + +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + pthread_barrier_init(barrier, NULL, count); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + pthread_barrier_destroy(barrier); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + pthread_barrier_wait(barrier); +} + + #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) #include <windows.h> +/* pipe_thread + */ typedef HANDLE pipe_thread; #define PIPE_THREAD_ROUTINE( name, param ) \ @@ -135,6 +167,9 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) return -1; } + +/* pipe_mutex + */ typedef CRITICAL_SECTION pipe_mutex; #define pipe_static_mutex(mutex) \ @@ -152,23 +187,74 @@ typedef CRITICAL_SECTION pipe_mutex; #define pipe_mutex_unlock(mutex) \ LeaveCriticalSection(&mutex) -/* XXX: dummy definitions, make it compile */ +/* pipe_condvar (XXX FIX THIS) + */ typedef unsigned pipe_condvar; -#define pipe_condvar_init(condvar) \ - (void) condvar +#define pipe_condvar_init(cond) \ + (void) cond + +#define pipe_condvar_destroy(cond) \ + (void) cond + +#define pipe_condvar_wait(cond, mutex) \ + (void) cond; (void) mutex + +#define pipe_condvar_signal(cond) \ + (void) cond + +#define pipe_condvar_broadcast(cond) \ + (void) cond + + +/* pipe_barrier (XXX FIX THIS) + */ +typedef unsigned pipe_barrier; + +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + /* XXX we could implement barriers with a mutex and condition var */ +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + assert(0); +} + -#define pipe_condvar_broadcast(condvar) \ - (void) condvar #else /** Dummy definitions */ typedef unsigned pipe_thread; + +#define PIPE_THREAD_ROUTINE( name, param ) \ + void * name( void *param ) + +static INLINE pipe_thread pipe_thread_create( void *(* routine)( void *), void *param ) +{ + return 0; +} + +static INLINE int pipe_thread_wait( pipe_thread thread ) +{ + return -1; +} + +static INLINE int pipe_thread_destroy( pipe_thread thread ) +{ + return -1; +} + typedef unsigned pipe_mutex; typedef unsigned pipe_condvar; +typedef unsigned pipe_barrier; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = 0 @@ -204,9 +290,77 @@ typedef unsigned pipe_condvar; (void) condvar +static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) +{ + /* XXX we could implement barriers with a mutex and condition var */ + assert(0); +} + +static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) +{ + assert(0); +} + +static INLINE void pipe_barrier_wait(pipe_barrier *barrier) +{ + assert(0); +} + + + #endif /* PIPE_OS_? */ +/* + * Semaphores + */ + +typedef struct +{ + pipe_mutex mutex; + pipe_condvar cond; + int counter; +} pipe_semaphore; + + +static INLINE void +pipe_semaphore_init(pipe_semaphore *sema, int init_val) +{ + pipe_mutex_init(sema->mutex); + pipe_condvar_init(sema->cond); + sema->counter = init_val; +} + +static INLINE void +pipe_semaphore_destroy(pipe_semaphore *sema) +{ + pipe_mutex_destroy(sema->mutex); + pipe_condvar_destroy(sema->cond); +} + +/** Signal/increment semaphore counter */ +static INLINE void +pipe_semaphore_signal(pipe_semaphore *sema) +{ + pipe_mutex_lock(sema->mutex); + sema->counter++; + pipe_condvar_signal(sema->cond); + pipe_mutex_unlock(sema->mutex); +} + +/** Wait for semaphore counter to be greater than zero */ +static INLINE void +pipe_semaphore_wait(pipe_semaphore *sema) +{ + pipe_mutex_lock(sema->mutex); + while (sema->counter <= 0) { + pipe_condvar_wait(sema->cond, sema->mutex); + } + sema->counter--; + pipe_mutex_unlock(sema->mutex); +} + + /* * Thread-specific data. @@ -276,4 +430,4 @@ pipe_tsd_set(pipe_tsd *tsd, void *value) -#endif /* _P_THREAD2_H_ */ +#endif /* OS_THREAD_H_ */ diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c new file mode 100644 index 00000000000..6259142bec0 --- /dev/null +++ b/src/gallium/auxiliary/os/os_time.c @@ -0,0 +1,128 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca <[email protected]> + */ + + +#include "pipe/p_config.h" + +#if !defined(PIPE_OS_EMBEDDED) + +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) +# include <sys/time.h> /* timeval */ +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +# include <windows.h> +# include <winddi.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +# include <windows.h> +extern VOID KeQuerySystemTime(PLARGE_INTEGER); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) +# include <windows.h> +#else +# error Unsupported OS +#endif + +#include "os_time.h" + + +int64_t +os_time_get(void) +{ +#if defined(PIPE_OS_UNIX) + + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_usec + tv.tv_sec*1000000LL; + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + + static LONGLONG frequency; + LONGLONG counter; + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + EngQueryPerformanceCounter(&counter); + return counter*INT64_C(1000000)/frequency; + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + + static LARGE_INTEGER frequency; + LARGE_INTEGER counter; + if(!frequency.QuadPart) + QueryPerformanceFrequency(&frequency); + QueryPerformanceCounter(&counter); + return counter.QuadPart*INT64_C(1000000)/frequency.QuadPart; + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + + /* Updated every 10 miliseconds, measured in units of 100 nanoseconds. + * http://msdn.microsoft.com/en-us/library/ms801642.aspx */ + LARGE_INTEGER counter; + KeQuerySystemTime(&counter); + return counter.QuadPart/10; + +#endif +} + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void +os_time_sleep(int64_t usecs) +{ + static LONGLONG frequency; + LONGLONG start, curr, end; + + EngQueryPerformanceCounter(&start); + + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + + end = start + (usecs * frequency + 999999LL)/1000000LL; + + do { + EngQueryPerformanceCounter(&curr); + } while(start <= curr && curr < end || + end < start && (curr < end || start <= curr)); +} + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +void +os_time_sleep(int64_t usecs) +{ + Sleep((usecs + 999) / 1000); +} + +#endif + + +#endif /* !PIPE_OS_EMBEDDED */ diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h new file mode 100644 index 00000000000..5b55c1b3747 --- /dev/null +++ b/src/gallium/auxiliary/os/os_time.h @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2008-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca <[email protected]> + */ + +#ifndef _OS_TIME_H_ +#define _OS_TIME_H_ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_UNIX) +# include <unistd.h> /* usleep */ +#endif + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Get the current time in microseconds from an unknown base. + */ +int64_t +os_time_get(void); + + +/* + * Sleep. + */ +#if defined(PIPE_OS_UNIX) +#define os_time_sleep(_usecs) usleep(_usecs) +#else +void +os_time_sleep(int64_t usecs); +#endif + + +/* + * Helper function for detecting time outs, taking in account overflow. + * + * Returns true the the current time has elapsed beyond the specified interval. + */ +static INLINE boolean +os_time_timeout(int64_t start, + int64_t end, + int64_t curr) +{ + if(start <= end) + return !(start <= curr && curr < end); + else + return !((start <= curr) || (curr < end)); +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _OS_TIME_H_ */ diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile new file mode 100644 index 00000000000..21d25d24748 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -0,0 +1,18 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = pipebuffer + +C_SOURCES = \ + pb_buffer_fenced.c \ + pb_buffer_malloc.c \ + pb_bufmgr_alt.c \ + pb_bufmgr_cache.c \ + pb_bufmgr_debug.c \ + pb_bufmgr_mm.c \ + pb_bufmgr_ondemand.c \ + pb_bufmgr_pool.c \ + pb_bufmgr_slab.c \ + pb_validate.c + +include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 00000000000..a074a554717 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,18 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_alt.c', + 'pb_bufmgr_cache.c', + 'pb_bufmgr_debug.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_ondemand.c', + 'pb_bufmgr_pool.c', + 'pb_bufmgr_slab.c', + 'pb_validate.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index eb7e84be848..34b1b77df40 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -46,6 +46,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index ba6f7b15f9e..95eb5f65635 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007-2009 VMware, Inc. + * Copyright 2007-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -28,9 +28,9 @@ /** * \file * Implementation of fenced buffers. - * - * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> - * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * + * \author Jose Fonseca <jfonseca-at-vmware-dot-com> + * \author Thomas Hellström <thellstrom-at-vmware-dot-com> */ @@ -44,12 +44,13 @@ #include "pipe/p_compiler.h" #include "pipe/p_defines.h" #include "util/u_debug.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" +#include "pb_bufmgr.h" @@ -59,23 +60,50 @@ #define SUPER(__derived) (&(__derived)->base) -struct fenced_buffer_list +struct fenced_manager { - pipe_mutex mutex; - + struct pb_manager base; + struct pb_manager *provider; struct pb_fence_ops *ops; - - pb_size numDelayed; - struct list_head delayed; - -#ifdef DEBUG - pb_size numUnfenced; + + /** + * Maximum buffer size that can be safely allocated. + */ + pb_size max_buffer_size; + + /** + * Maximum cpu memory we can allocate before we start waiting for the + * GPU to idle. + */ + pb_size max_cpu_total_size; + + /** + * Following members are mutable and protected by this mutex. + */ + pipe_mutex mutex; + + /** + * Fenced buffer list. + * + * All fenced buffers are placed in this listed, ordered from the oldest + * fence to the newest fence. + */ + struct list_head fenced; + pb_size num_fenced; + struct list_head unfenced; -#endif + pb_size num_unfenced; + + /** + * How much temporary CPU memory is being used to hold unvalidated buffers. + */ + pb_size cpu_total_size; }; /** + * Fenced buffer. + * * Wrapper around a pipe buffer which adds fencing and reference counting. */ struct fenced_buffer @@ -85,22 +113,26 @@ struct fenced_buffer */ struct pb_buffer base; - struct pb_buffer *buffer; - struct fenced_buffer_list *list; + struct fenced_manager *mgr; - /** - * Protected by fenced_buffer_list::mutex + /* + * Following members are mutable and protected by fenced_manager::mutex. */ + struct list_head head; /** - * Following members are mutable and protected by this mutex. - * - * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex - * held, but in order to prevent deadlocks you must never lock - * fenced_buffer_list::mutex with this mutex held. + * Buffer with storage. */ - pipe_mutex mutex; + struct pb_buffer *buffer; + pb_size size; + struct pb_desc desc; + + /** + * Temporary CPU storage data. Used when there isn't enough GPU memory to + * store the buffer. + */ + void *data; /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -109,12 +141,22 @@ struct fenced_buffer unsigned flags; unsigned mapcount; + struct pb_validate *vl; unsigned validation_flags; + struct pipe_fence_handle *fence; }; +static INLINE struct fenced_manager * +fenced_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct fenced_manager *)mgr; +} + + static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { @@ -123,81 +165,172 @@ fenced_buffer(struct pb_buffer *buf) } +static void +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf); + +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait); + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf); + + +/** + * Dump the fenced buffer list. + * + * Useful to understand failures to allocate buffers. + */ +static void +fenced_manager_dump_locked(struct fenced_manager *fenced_mgr) +{ +#ifdef DEBUG + struct pb_fence_ops *ops = fenced_mgr->ops; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + + debug_printf("%10s %7s %8s %7s %10s %s\n", + "buffer", "size", "refcount", "storage", "fence", "signalled"); + + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(!fenced_buf->fence); + debug_printf("%10p %7u %8u %7s\n", + (void *) fenced_buf, + fenced_buf->base.base.size, + p_atomic_read(&fenced_buf->base.base.reference.count), + fenced_buf->buffer ? "gpu" : (fenced_buf->data ? "cpu" : "none")); + curr = next; + next = curr->next; + } + + curr = fenced_mgr->fenced.next; + next = curr->next; + while(curr != &fenced_mgr->fenced) { + int signaled; + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(fenced_buf->buffer); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + debug_printf("%10p %7u %8u %7s %10p %s\n", + (void *) fenced_buf, + fenced_buf->base.base.size, + p_atomic_read(&fenced_buf->base.base.reference.count), + "gpu", + (void *) fenced_buf->fence, + signaled == 0 ? "y" : "n"); + curr = next; + next = curr->next; + } +#else + (void)fenced_mgr; +#endif +} + + +static INLINE void +fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + + assert(!fenced_buf->fence); + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + + FREE(fenced_buf); +} + + /** * Add the buffer to the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order, before calling this function. - * + * * Reference count should be incremented before calling this function. */ static INLINE void -fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, +fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, struct fenced_buffer *fenced_buf) { assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); - /* TODO: Move the reference count increment here */ - -#ifdef DEBUG + p_atomic_inc(&fenced_buf->base.base.reference.count); + LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; -#endif - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); - ++fenced_list->numDelayed; + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced); + ++fenced_mgr->num_fenced; } /** - * Remove the buffer from the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order before calling this function. - * - * Reference count should be decremented after calling this function. + * Remove the buffer from the fenced list, and potentially destroy the buffer + * if the reference count reaches zero. + * + * Returns TRUE if the buffer was detroyed. */ -static INLINE void -fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, +static INLINE boolean +fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr, struct fenced_buffer *fenced_buf) { - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; assert(fenced_buf->fence); - assert(fenced_buf->list == fenced_list); - + assert(fenced_buf->mgr == fenced_mgr); + ops->fence_reference(ops, &fenced_buf->fence, NULL); fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - + assert(fenced_buf->head.prev); assert(fenced_buf->head.next); - + LIST_DEL(&fenced_buf->head); - assert(fenced_list->numDelayed); - --fenced_list->numDelayed; - -#ifdef DEBUG - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->unfenced); - ++fenced_list->numUnfenced; -#endif - - /* TODO: Move the reference count decrement and destruction here */ + assert(fenced_mgr->num_fenced); + --fenced_mgr->num_fenced; + + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + + if (p_atomic_dec_zero(&fenced_buf->base.base.reference.count)) { + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + return TRUE; + } + + return FALSE; } /** * Wait for the fence to expire, and remove it from the fenced list. - * - * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be - * held -- it will be acquired internally. + * + * This function will release and re-aquire the mutex, so any copy of mutable + * state must be discarded after calling it. */ static INLINE enum pipe_error -fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) { - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; enum pipe_error ret = PIPE_ERROR; #if 0 @@ -207,22 +340,42 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); - /* - * Acquire the global lock. Must release buffer mutex first to preserve - * lock order. - */ - pipe_mutex_unlock(fenced_buf->mutex); - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) { - /* Remove from the fenced list */ - /* TODO: remove consequents */ - fenced_buffer_remove_locked(fenced_list, fenced_buf); + struct pipe_fence_handle *fence = NULL; + int finished; + boolean proceed; + + ops->fence_reference(ops, &fence, fenced_buf->fence); + + pipe_mutex_unlock(fenced_mgr->mutex); + + finished = ops->fence_finish(ops, fenced_buf->fence, 0); + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + + /* + * Only proceed if the fence object didn't change in the meanwhile. + * Otherwise assume the work has been already carried out by another + * thread that re-aquired the lock before us. + */ + proceed = fence == fenced_buf->fence ? TRUE : FALSE; + + ops->fence_reference(ops, &fence, NULL); - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + if(proceed && finished == 0) { + /* + * Remove from the fenced list + */ + + boolean destroyed; + + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + /* TODO: remove consequents buffers with the same fence? */ + + assert(!destroyed); fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; @@ -230,131 +383,350 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, } } - pipe_mutex_unlock(fenced_list->mutex); - return ret; } /** - * Free as many fenced buffers from the list head as possible. + * Remove as many fenced buffers from the fenced list as possible. + * + * Returns TRUE if at least one buffer was removed. */ -static void -fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, - int wait) +static boolean +fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr, + boolean wait) { - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; - struct pb_buffer *pb_buf; struct pipe_fence_handle *prev_fence = NULL; + boolean ret = FALSE; - curr = fenced_list->delayed.next; + curr = fenced_mgr->fenced.next; next = curr->next; - while(curr != &fenced_list->delayed) { + while(curr != &fenced_mgr->fenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence != prev_fence) { int signaled; - if (wait) + + if (wait) { signaled = ops->fence_finish(ops, fenced_buf->fence, 0); - else + + /* + * Don't return just now. Instead preemptively check if the + * following buffers' fences already expired, without further waits. + */ + wait = FALSE; + } + else { signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + } + if (signaled != 0) { - pipe_mutex_unlock(fenced_buf->mutex); - break; + return ret; } + prev_fence = fenced_buf->fence; } else { + /* This buffer's fence object is identical to the previous buffer's + * fence object, so no need to check the fence again. + */ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - fenced_buffer_remove_locked(fenced_list, fenced_buf); - pipe_mutex_unlock(fenced_buf->mutex); + fenced_buffer_remove_locked(fenced_mgr, fenced_buf); - pb_buf = &fenced_buf->base; - pb_reference(&pb_buf, NULL); + ret = TRUE; - curr = next; + curr = next; next = curr->next; } + + return ret; +} + + +/** + * Try to free some GPU memory by backing it up into CPU memory. + * + * Returns TRUE if at least one buffer was freed. + */ +static boolean +fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr) +{ + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + /* + * We can only move storage if the buffer is not mapped and not + * validated. + */ + if(fenced_buf->buffer && + !fenced_buf->mapcount && + !fenced_buf->vl) { + enum pipe_error ret; + + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + if(ret == PIPE_OK) { + ret = fenced_buffer_copy_storage_to_cpu_locked(fenced_buf); + if(ret == PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + return TRUE; + } + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } + } + + curr = next; + next = curr->next; + } + + return FALSE; +} + + +/** + * Destroy CPU storage for this buffer. + */ +static void +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf) +{ + if(fenced_buf->data) { + align_free(fenced_buf->data); + fenced_buf->data = NULL; + assert(fenced_buf->mgr->cpu_total_size >= fenced_buf->size); + fenced_buf->mgr->cpu_total_size -= fenced_buf->size; + } +} + + +/** + * Create CPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(!fenced_buf->data); + if(fenced_buf->data) + return PIPE_OK; + + if (fenced_mgr->cpu_total_size + fenced_buf->size > fenced_mgr->max_cpu_total_size) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_buf->data = align_malloc(fenced_buf->size, fenced_buf->desc.alignment); + if(!fenced_buf->data) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_mgr->cpu_total_size += fenced_buf->size; + + return PIPE_OK; +} + + +/** + * Destroy the GPU storage. + */ +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf) +{ + if(fenced_buf->buffer) { + pb_reference(&fenced_buf->buffer, NULL); + } +} + + +/** + * Try to create GPU storage for this buffer. + * + * This function is a shorthand around pb_manager::create_buffer for + * fenced_buffer_create_gpu_storage_locked()'s benefit. + */ +static INLINE boolean +fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_manager *provider = fenced_mgr->provider; + + assert(!fenced_buf->buffer); + + fenced_buf->buffer = provider->create_buffer(fenced_mgr->provider, + fenced_buf->size, + &fenced_buf->desc); + return fenced_buf->buffer ? TRUE : FALSE; +} + + +/** + * Create GPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait) +{ + assert(!fenced_buf->buffer); + + /* + * Check for signaled buffers before trying to allocate. + */ + fenced_manager_check_signalled_locked(fenced_mgr, FALSE); + + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + + /* + * Keep trying while there is some sort of progress: + * - fences are expiring, + * - or buffers are being being swapped out from GPU memory into CPU memory. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, FALSE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + } + + if(!fenced_buf->buffer && wait) { + /* + * Same as before, but this time around, wait to free buffers if + * necessary. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, TRUE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + } + } + + if(!fenced_buf->buffer) { + if(0) + fenced_manager_dump_locked(fenced_mgr); + + /* give up */ + return PIPE_ERROR_OUT_OF_MEMORY; + } + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf) +{ + uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + if(!map) + return PIPE_ERROR; + + memcpy(map, fenced_buf->data, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf) +{ + const uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(!map) + return PIPE_ERROR; + + memcpy(fenced_buf->data, map, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; } static void fenced_buffer_destroy(struct pb_buffer *buf) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(!fenced_buf->fence); -#ifdef DEBUG - pipe_mutex_lock(fenced_list->mutex); - assert(fenced_buf->head.prev); - assert(fenced_buf->head.next); - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; - pipe_mutex_unlock(fenced_list->mutex); -#else - (void)fenced_list; -#endif + pipe_mutex_lock(fenced_mgr->mutex); - pb_reference(&fenced_buf->buffer, NULL); + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); - pipe_mutex_destroy(fenced_buf->mutex); - FREE(fenced_buf); + pipe_mutex_unlock(fenced_mgr->mutex); } static void * -fenced_buffer_map(struct pb_buffer *buf, +fenced_buffer_map(struct pb_buffer *buf, unsigned flags) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pb_fence_ops *ops = fenced_list->ops; + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; void *map = NULL; - pipe_mutex_lock(fenced_buf->mutex); + pipe_mutex_lock(fenced_mgr->mutex); assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); - - /* Serialize writes */ - if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || - ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { + + /* + * Serialize writes. + */ + while((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || + ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { + + /* + * Don't wait for the GPU to finish accessing it, if blocking is forbidden. + */ if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { - /* Don't wait for the GPU to finish writing */ goto done; } - /* Wait for the GPU to finish writing */ - fenced_buffer_finish_locked(fenced_list, fenced_buf); + if (flags & PIPE_BUFFER_USAGE_UNSYNCHRONIZED) { + break; + } + + /* + * Wait for the GPU to finish accessing. This will release and re-acquire + * the mutex, so all copies of mutable state must be discarded. + */ + fenced_buffer_finish_locked(fenced_mgr, fenced_buf); } -#if 0 - /* Check for CPU write access (read is OK) */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { - /* this is legal -- just for debugging */ - debug_warning("concurrent CPU writes"); + if(fenced_buf->buffer) { + map = pb_map(fenced_buf->buffer, flags); } -#endif - - map = pb_map(fenced_buf->buffer, flags); + else { + assert(fenced_buf->data); + map = fenced_buf->data; + } + if(map) { ++fenced_buf->mapcount; fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } done: - pipe_mutex_unlock(fenced_buf->mutex); - + pipe_mutex_unlock(fenced_mgr->mutex); + return map; } @@ -363,18 +735,20 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - - pipe_mutex_lock(fenced_buf->mutex); - + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { - pb_unmap(fenced_buf->buffer); + if (fenced_buf->buffer) + pb_unmap(fenced_buf->buffer); --fenced_buf->mapcount; if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } - - pipe_mutex_unlock(fenced_buf->mutex); + + pipe_mutex_unlock(fenced_mgr->mutex); } @@ -384,9 +758,10 @@ fenced_buffer_validate(struct pb_buffer *buf, unsigned flags) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; enum pipe_error ret; - - pipe_mutex_lock(fenced_buf->mutex); + + pipe_mutex_lock(fenced_mgr->mutex); if(!vl) { /* invalidate */ @@ -395,28 +770,16 @@ fenced_buffer_validate(struct pb_buffer *buf, ret = PIPE_OK; goto done; } - + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; - /* Buffer cannot be validated in two different lists */ + /* Buffer cannot be validated in two different lists */ if(fenced_buf->vl && fenced_buf->vl != vl) { ret = PIPE_ERROR_RETRY; goto done; } - -#if 0 - /* Do not validate if buffer is still mapped */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { - /* TODO: wait for the thread that mapped the buffer to unmap it */ - ret = PIPE_ERROR_RETRY; - goto done; - } - /* Final sanity checking */ - assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); - assert(!fenced_buf->mapcount); -#endif if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { @@ -424,16 +787,41 @@ fenced_buffer_validate(struct pb_buffer *buf, ret = PIPE_OK; goto done; } - + + /* + * Create and update GPU storage. + */ + if(!fenced_buf->buffer) { + assert(!fenced_buf->mapcount); + + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + if(ret != PIPE_OK) { + goto done; + } + + ret = fenced_buffer_copy_storage_to_gpu_locked(fenced_buf); + if(ret != PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + goto done; + } + + if(fenced_buf->mapcount) { + debug_printf("warning: validating a buffer while it is still mapped\n"); + } + else { + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } + } + ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) goto done; - + fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; - + done: - pipe_mutex_unlock(fenced_buf->mutex); + pipe_mutex_unlock(fenced_mgr->mutex); return ret; } @@ -443,43 +831,37 @@ static void fenced_buffer_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) { - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pb_fence_ops *ops; - - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - ops = fenced_list->ops; + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); + pipe_mutex_lock(fenced_mgr->mutex); assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(fenced_buf->buffer); if(fence != fenced_buf->fence) { assert(fenced_buf->vl); assert(fenced_buf->validation_flags); - + if (fenced_buf->fence) { - fenced_buffer_remove_locked(fenced_list, fenced_buf); - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + boolean destroyed; + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + assert(!destroyed); } if (fence) { ops->fence_reference(ops, &fenced_buf->fence, fence); fenced_buf->flags |= fenced_buf->validation_flags; - p_atomic_inc(&fenced_buf->base.base.reference.count); - fenced_buffer_add_locked(fenced_list, fenced_buf); + fenced_buffer_add_locked(fenced_mgr, fenced_buf); } pb_fence(fenced_buf->buffer, fence); - + fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; } - pipe_mutex_unlock(fenced_buf->mutex); - pipe_mutex_unlock(fenced_list->mutex); + pipe_mutex_unlock(fenced_mgr->mutex); } @@ -489,12 +871,29 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - /* NOTE: accesses immutable members only -- mutex not necessary */ - pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * This should only be called when the buffer is validated. Typically + * when processing relocations. + */ + assert(fenced_buf->vl); + assert(fenced_buf->buffer); + + if(fenced_buf->buffer) + pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + else { + *base_buf = buf; + *offset = 0; + } + + pipe_mutex_unlock(fenced_mgr->mutex); } -static const struct pb_vtbl +static const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, @@ -505,154 +904,166 @@ fenced_buffer_vtbl = { }; -struct pb_buffer * -fenced_buffer_create(struct fenced_buffer_list *fenced_list, - struct pb_buffer *buffer) +/** + * Wrap a buffer in a fenced buffer. + */ +static struct pb_buffer * +fenced_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) { - struct fenced_buffer *buf; - - if(!buffer) - return NULL; - - buf = CALLOC_STRUCT(fenced_buffer); - if(!buf) { - pb_reference(&buffer, NULL); - return NULL; + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + struct fenced_buffer *fenced_buf; + enum pipe_error ret; + + /* + * Don't stall the GPU, waste time evicting buffers, or waste memory + * trying to create a buffer that will most likely never fit into the + * graphics aperture. + */ + if(size > fenced_mgr->max_buffer_size) { + goto no_buffer; } - - pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = buffer->base.alignment; - buf->base.base.usage = buffer->base.usage; - buf->base.base.size = buffer->base.size; - - buf->base.vtbl = &fenced_buffer_vtbl; - buf->buffer = buffer; - buf->list = fenced_list; - - pipe_mutex_init(buf->mutex); -#ifdef DEBUG - pipe_mutex_lock(fenced_list->mutex); - LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); - ++fenced_list->numUnfenced; - pipe_mutex_unlock(fenced_list->mutex); -#endif + fenced_buf = CALLOC_STRUCT(fenced_buffer); + if(!fenced_buf) + goto no_buffer; - return &buf->base; -} + pipe_reference_init(&fenced_buf->base.base.reference, 1); + fenced_buf->base.base.alignment = desc->alignment; + fenced_buf->base.base.usage = desc->usage; + fenced_buf->base.base.size = size; + fenced_buf->size = size; + fenced_buf->desc = *desc; + fenced_buf->base.vtbl = &fenced_buffer_vtbl; + fenced_buf->mgr = fenced_mgr; -struct fenced_buffer_list * -fenced_buffer_list_create(struct pb_fence_ops *ops) -{ - struct fenced_buffer_list *fenced_list; + pipe_mutex_lock(fenced_mgr->mutex); - fenced_list = CALLOC_STRUCT(fenced_buffer_list); - if (!fenced_list) - return NULL; + /* + * Try to create GPU storage without stalling, + */ + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, FALSE); - fenced_list->ops = ops; + /* + * Attempt to use CPU memory to avoid stalling the GPU. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + } - LIST_INITHEAD(&fenced_list->delayed); - fenced_list->numDelayed = 0; - -#ifdef DEBUG - LIST_INITHEAD(&fenced_list->unfenced); - fenced_list->numUnfenced = 0; -#endif + /* + * Create GPU storage, waiting for some to be available. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + } + + /* + * Give up. + */ + if(ret != PIPE_OK) { + goto no_storage; + } - pipe_mutex_init(fenced_list->mutex); + assert(fenced_buf->buffer || fenced_buf->data); - return fenced_list; -} + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + pipe_mutex_unlock(fenced_mgr->mutex); + return &fenced_buf->base; -void -fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait) -{ - pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, wait); - pipe_mutex_unlock(fenced_list->mutex); +no_storage: + pipe_mutex_unlock(fenced_mgr->mutex); + FREE(fenced_buf); +no_buffer: + return NULL; } -#ifdef DEBUG -void -fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) +static void +fenced_bufmgr_flush(struct pb_manager *mgr) { - struct pb_fence_ops *ops = fenced_list->ops; - struct list_head *curr, *next; - struct fenced_buffer *fenced_buf; + struct fenced_manager *fenced_mgr = fenced_manager(mgr); - pipe_mutex_lock(fenced_list->mutex); + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; + pipe_mutex_unlock(fenced_mgr->mutex); - debug_printf("%10s %7s %7s %10s %s\n", - "buffer", "size", "refcount", "fence", "signalled"); - - curr = fenced_list->unfenced.next; - next = curr->next; - while(curr != &fenced_list->unfenced) { - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); - assert(!fenced_buf->fence); - debug_printf("%10p %7u %7u\n", - (void *) fenced_buf, - fenced_buf->base.base.size, - p_atomic_read(&fenced_buf->base.base.reference.count)); - pipe_mutex_unlock(fenced_buf->mutex); - curr = next; - next = curr->next; - } - - curr = fenced_list->delayed.next; - next = curr->next; - while(curr != &fenced_list->delayed) { - int signaled; - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); - signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - debug_printf("%10p %7u %7u %10p %s\n", - (void *) fenced_buf, - fenced_buf->base.base.size, - p_atomic_read(&fenced_buf->base.base.reference.count), - (void *) fenced_buf->fence, - signaled == 0 ? "y" : "n"); - pipe_mutex_unlock(fenced_buf->mutex); - curr = next; - next = curr->next; - } - - pipe_mutex_unlock(fenced_list->mutex); + assert(fenced_mgr->provider->flush); + if(fenced_mgr->provider->flush) + fenced_mgr->provider->flush(fenced_mgr->provider); } -#endif -void -fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) +static void +fenced_bufmgr_destroy(struct pb_manager *mgr) { - pipe_mutex_lock(fenced_list->mutex); + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + + pipe_mutex_lock(fenced_mgr->mutex); /* Wait on outstanding fences */ - while (fenced_list->numDelayed) { - pipe_mutex_unlock(fenced_list->mutex); + while (fenced_mgr->num_fenced) { + pipe_mutex_unlock(fenced_mgr->mutex); #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif - pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, 1); + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; } #ifdef DEBUG - /*assert(!fenced_list->numUnfenced);*/ + /*assert(!fenced_mgr->num_unfenced);*/ #endif - - pipe_mutex_unlock(fenced_list->mutex); - pipe_mutex_destroy(fenced_list->mutex); - - fenced_list->ops->destroy(fenced_list->ops); - - FREE(fenced_list); + + pipe_mutex_unlock(fenced_mgr->mutex); + pipe_mutex_destroy(fenced_mgr->mutex); + + if(fenced_mgr->provider) + fenced_mgr->provider->destroy(fenced_mgr->provider); + + fenced_mgr->ops->destroy(fenced_mgr->ops); + + FREE(fenced_mgr); } +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size) +{ + struct fenced_manager *fenced_mgr; + + if(!provider) + return NULL; + + fenced_mgr = CALLOC_STRUCT(fenced_manager); + if (!fenced_mgr) + return NULL; + + fenced_mgr->base.destroy = fenced_bufmgr_destroy; + fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + fenced_mgr->base.flush = fenced_bufmgr_flush; + + fenced_mgr->provider = provider; + fenced_mgr->ops = ops; + fenced_mgr->max_buffer_size = max_buffer_size; + fenced_mgr->max_cpu_total_size = max_cpu_total_size; + + LIST_INITHEAD(&fenced_mgr->fenced); + fenced_mgr->num_fenced = 0; + + LIST_INITHEAD(&fenced_mgr->unfenced); + fenced_mgr->num_unfenced = 0; + + pipe_mutex_init(fenced_mgr->mutex); + + return &fenced_mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 034ca1e024a..0372f81d0a1 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -98,43 +98,6 @@ struct pb_fence_ops }; -/** - * Create a fenced buffer list. - * - * See also fenced_bufmgr_create for a more convenient way to use this. - */ -struct fenced_buffer_list * -fenced_buffer_list_create(struct pb_fence_ops *ops); - - -/** - * Walk the fenced buffer list to check and free signalled buffers. - */ -void -fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait); - - -#ifdef DEBUG -void -fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list); -#endif - - -void -fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list); - - -/** - * Wrap a buffer in a fenced buffer. - * - * NOTE: this will not increase the buffer reference count. - */ -struct pb_buffer * -fenced_buffer_create(struct fenced_buffer_list *fenced, - struct pb_buffer *buffer); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 8c8d7130781..06669917ff6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -175,7 +175,9 @@ struct pb_fence_ops; */ struct pb_manager * fenced_bufmgr_create(struct pb_manager *provider, - struct pb_fence_ops *ops); + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size); struct pb_manager * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 7b34c8e3578..c1498318dfb 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 6e3214ca9c9..93f8960641f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_double_list.h" @@ -371,6 +371,9 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, struct pb_desc real_desc; pb_size real_size; + assert(size); + assert(desc->alignment); + buf = CALLOC_STRUCT(pb_debug_buffer); if(!buf) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c deleted file mode 100644 index 97dd1427fda..00000000000 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ /dev/null @@ -1,152 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/** - * \file - * A buffer manager that wraps buffers in fenced buffers. - * - * \author Jose Fonseca <[email protected]> - */ - - -#include "util/u_debug.h" -#include "util/u_memory.h" - -#include "pb_buffer.h" -#include "pb_buffer_fenced.h" -#include "pb_bufmgr.h" - - -struct fenced_pb_manager -{ - struct pb_manager base; - - struct pb_manager *provider; - - struct fenced_buffer_list *fenced_list; -}; - - -static INLINE struct fenced_pb_manager * -fenced_pb_manager(struct pb_manager *mgr) -{ - assert(mgr); - return (struct fenced_pb_manager *)mgr; -} - - -static struct pb_buffer * -fenced_bufmgr_create_buffer(struct pb_manager *mgr, - pb_size size, - const struct pb_desc *desc) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - struct pb_buffer *buf; - struct pb_buffer *fenced_buf; - - /* check for free buffers before allocating new ones */ - fenced_buffer_list_check_free(fenced_mgr->fenced_list, 0); - - buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); - if(!buf) { - /* try harder to get a buffer */ - fenced_buffer_list_check_free(fenced_mgr->fenced_list, 1); - - buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); - if(!buf) { -#if 0 - fenced_buffer_list_dump(fenced_mgr->fenced_list); -#endif - - /* give up */ - return NULL; - } - } - - fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); - if(!fenced_buf) { - pb_reference(&buf, NULL); - } - - return fenced_buf; -} - - -static void -fenced_bufmgr_flush(struct pb_manager *mgr) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - - fenced_buffer_list_check_free(fenced_mgr->fenced_list, TRUE); - - assert(fenced_mgr->provider->flush); - if(fenced_mgr->provider->flush) - fenced_mgr->provider->flush(fenced_mgr->provider); -} - - -static void -fenced_bufmgr_destroy(struct pb_manager *mgr) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - - fenced_buffer_list_destroy(fenced_mgr->fenced_list); - - if(fenced_mgr->provider) - fenced_mgr->provider->destroy(fenced_mgr->provider); - - FREE(fenced_mgr); -} - - -struct pb_manager * -fenced_bufmgr_create(struct pb_manager *provider, - struct pb_fence_ops *ops) -{ - struct fenced_pb_manager *fenced_mgr; - - if(!provider) - return NULL; - - fenced_mgr = CALLOC_STRUCT(fenced_pb_manager); - if (!fenced_mgr) - return NULL; - - fenced_mgr->base.destroy = fenced_bufmgr_destroy; - fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; - fenced_mgr->base.flush = fenced_bufmgr_flush; - - fenced_mgr->provider = provider; - fenced_mgr->fenced_list = fenced_buffer_list_create(ops); - if(!fenced_mgr->fenced_list) { - FREE(fenced_mgr); - return NULL; - } - - return &fenced_mgr->base; -} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 6400fc5b0a3..63195715d68 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -35,7 +35,7 @@ #include "pipe/p_defines.h" #include "util/u_debug.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_mm.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 7fd65ed2261..fea234ae8c7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -37,7 +37,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_double_list.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index d21910d0bf0..c445cb578b0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -38,7 +38,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_double_list.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index ce40c0cf0e6..903afc749d3 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -39,7 +39,6 @@ #include "util/u_debug.h" #include "pb_buffer.h" -#include "pb_buffer_fenced.h" #include "pb_validate.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index ffed768f979..65d5ce795be 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -32,7 +32,7 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_memory.h" #include "rtasm_execmem.h" @@ -58,7 +58,7 @@ #include <unistd.h> #include <sys/mman.h> -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_mm.h" #define EXEC_HEAP_SIZE (10*1024*1024) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 1acf3c373eb..f675427d987 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -673,6 +673,13 @@ void x86_and( struct x86_function *p, emit_op_modrm( p, 0x23, 0x21, dst, src ); } +void x86_div( struct x86_function *p, + struct x86_reg src ) +{ + assert(src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); +} + /*********************************************************************** diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 731a6517968..f7612d416a0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -244,6 +244,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); +void x86_div( struct x86_function *p, struct x86_reg src ); void x86_cdecl_caller_push_regs( struct x86_function *p ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index de9cbc86305..0890078cd05 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -103,10 +103,11 @@ tgsi_default_declaration( void ) declaration.File = TGSI_FILE_NULL; declaration.UsageMask = TGSI_WRITEMASK_XYZW; declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; + declaration.Dimension = 0; declaration.Semantic = 0; declaration.Centroid = 0; declaration.Invariant = 0; - declaration.Padding = 0; + declaration.CylindricalWrap = 0; return declaration; } @@ -116,9 +117,11 @@ tgsi_build_declaration( unsigned file, unsigned usage_mask, unsigned interpolate, + unsigned dimension, unsigned semantic, unsigned centroid, unsigned invariant, + unsigned cylindrical_wrap, struct tgsi_header *header ) { struct tgsi_declaration declaration; @@ -130,9 +133,11 @@ tgsi_build_declaration( declaration.File = file; declaration.UsageMask = usage_mask; declaration.Interpolate = interpolate; + declaration.Dimension = dimension; declaration.Semantic = semantic; declaration.Centroid = centroid; declaration.Invariant = invariant; + declaration.CylindricalWrap = cylindrical_wrap; header_bodysize_grow( header ); @@ -183,9 +188,11 @@ tgsi_build_full_declaration( full_decl->Declaration.File, full_decl->Declaration.UsageMask, full_decl->Declaration.Interpolate, + full_decl->Declaration.Dimension, full_decl->Declaration.Semantic, full_decl->Declaration.Centroid, full_decl->Declaration.Invariant, + full_decl->Declaration.CylindricalWrap, header ); if (maxsize <= size) @@ -199,6 +206,20 @@ tgsi_build_full_declaration( declaration, header ); + if (full_decl->Declaration.Dimension) { + struct tgsi_declaration_dimension *dd; + + if (maxsize <= size) { + return 0; + } + dd = (struct tgsi_declaration_dimension *)&tokens[size]; + size++; + + *dd = tgsi_build_declaration_dimension(full_decl->Dim.Index2D, + declaration, + header); + } + if( full_decl->Declaration.Semantic ) { struct tgsi_declaration_semantic *ds; @@ -249,6 +270,34 @@ tgsi_build_declaration_range( return declaration_range; } +struct tgsi_declaration_dimension +tgsi_default_declaration_dimension(void) +{ + struct tgsi_declaration_dimension dd; + + dd.Index2D = 0; + dd.Padding = 0; + + return dd; +} + +struct tgsi_declaration_dimension +tgsi_build_declaration_dimension(unsigned index_2d, + struct tgsi_declaration *declaration, + struct tgsi_header *header) +{ + struct tgsi_declaration_dimension dd; + + assert(index_2d <= 0xFFFF); + + dd = tgsi_default_declaration_dimension(); + dd.Index2D = index_2d; + + declaration_grow(declaration, header); + + return dd; +} + struct tgsi_declaration_semantic tgsi_default_declaration_semantic( void ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 9de2757fe40..13d7f5272d6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -64,9 +64,11 @@ tgsi_build_declaration( unsigned file, unsigned usage_mask, unsigned interpolate, + unsigned dimension, unsigned semantic, unsigned centroid, unsigned invariant, + unsigned cylindrical_wrap, struct tgsi_header *header ); struct tgsi_full_declaration @@ -89,6 +91,14 @@ tgsi_build_declaration_range( struct tgsi_declaration *declaration, struct tgsi_header *header ); +struct tgsi_declaration_dimension +tgsi_default_declaration_dimension(void); + +struct tgsi_declaration_dimension +tgsi_build_declaration_dimension(unsigned index_2d, + struct tgsi_declaration *declaration, + struct tgsi_header *header); + struct tgsi_declaration_semantic tgsi_default_declaration_semantic( void ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index e2e5394f86f..57031419f8e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -123,7 +123,8 @@ static const char *semantic_names[] = "NORMAL", "FACE", "EDGEFLAG", - "PRIM_ID" + "PRIM_ID", + "INSTANCEID" }; static const char *immediate_type_names[] = @@ -158,7 +159,9 @@ static const char *property_names[] = { "GS_INPUT_PRIMITIVE", "GS_OUTPUT_PRIMITIVE", - "GS_MAX_OUTPUT_VERTICES" + "GS_MAX_OUTPUT_VERTICES", + "FS_COORD_ORIGIN", + "FS_COORD_PIXEL_CENTER" }; static const char *primitive_names[] = @@ -175,29 +178,18 @@ static const char *primitive_names[] = "POLYGON" }; +static const char *fs_coord_origin_names[] = +{ + "UPPER_LEFT", + "LOWER_LEFT" +}; -static void -_dump_register_decl( - struct dump_ctx *ctx, - uint file, - int first, - int last ) +static const char *fs_coord_pixel_center_names[] = { - ENM( file, file_names ); + "HALF_INTEGER", + "INTEGER" +}; - /* all geometry shader inputs are two dimensional */ - if (file == TGSI_FILE_INPUT && - ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) - TXT("[]"); - - CHR( '[' ); - SID( first ); - if (first != last) { - TXT( ".." ); - SID( last ); - } - CHR( ']' ); -} static void _dump_register_dst( @@ -218,8 +210,13 @@ _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { + ENM(src->Register.File, file_names); + if (src->Register.Dimension) { + CHR('['); + SID(src->Dimension.Index); + CHR(']'); + } if (src->Register.Indirect) { - ENM( src->Register.File, file_names ); CHR( '[' ); ENM( src->Indirect.File, file_names ); CHR( '[' ); @@ -233,16 +230,10 @@ _dump_register_src( } CHR( ']' ); } else { - ENM( src->Register.File, file_names ); CHR( '[' ); SID( src->Register.Index ); CHR( ']' ); } - if (src->Register.Dimension) { - CHR( '[' ); - SID( src->Dimension.Index ); - CHR( ']' ); - } } static void @@ -299,11 +290,28 @@ iter_declaration( TXT( "DCL " ); - _dump_register_decl( - ctx, - decl->Declaration.File, - decl->Range.First, - decl->Range.Last ); + ENM(decl->Declaration.File, file_names); + + /* all geometry shader inputs are two dimensional */ + if (decl->Declaration.File == TGSI_FILE_INPUT && + iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + TXT("[]"); + } + + if (decl->Declaration.Dimension) { + CHR('['); + SID(decl->Dim.Index2D); + CHR(']'); + } + + CHR('['); + SID(decl->Range.First); + if (decl->Range.First != decl->Range.Last) { + TXT(".."); + SID(decl->Range.Last); + } + CHR(']'); + _dump_writemask( ctx, decl->Declaration.UsageMask ); @@ -334,6 +342,22 @@ iter_declaration( TXT( ", INVARIANT" ); } + if (decl->Declaration.CylindricalWrap) { + TXT(", CYLWRAP_"); + if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_X) { + CHR('X'); + } + if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Y) { + CHR('Y'); + } + if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Z) { + CHR('Z'); + } + if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_W) { + CHR('W'); + } + } + EOL(); return TRUE; @@ -372,6 +396,12 @@ iter_property( case TGSI_PROPERTY_GS_OUTPUT_PRIM: ENM(prop->u[i].Data, primitive_names); break; + case TGSI_PROPERTY_FS_COORD_ORIGIN: + ENM(prop->u[i].Data, fs_coord_origin_names); + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + ENM(prop->u[i].Data, fs_coord_pixel_center_names); + break; default: SID( prop->u[i].Data ); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c deleted file mode 100644 index 47fd1dd590e..00000000000 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ /dev/null @@ -1,462 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_debug.h" -#include "util/u_string.h" -#include "tgsi_dump_c.h" -#include "tgsi_build.h" -#include "tgsi_info.h" -#include "tgsi_parse.h" - -static void -dump_enum( - const unsigned e, - const char **enums, - const unsigned enums_count ) -{ - if (e >= enums_count) { - debug_printf( "%u", e ); - } - else { - debug_printf( "%s", enums[e] ); - } -} - -#define EOL() debug_printf( "\n" ) -#define TXT(S) debug_printf( "%s", S ) -#define CHR(C) debug_printf( "%c", C ) -#define UIX(I) debug_printf( "0x%x", I ) -#define UID(I) debug_printf( "%u", I ) -#define SID(I) debug_printf( "%d", I ) -#define FLT(F) debug_printf( "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) - -static const char *TGSI_PROCESSOR_TYPES[] = -{ - "PROCESSOR_FRAGMENT", - "PROCESSOR_VERTEX", - "PROCESSOR_GEOMETRY" -}; - -static const char *TGSI_TOKEN_TYPES[] = -{ - "TOKEN_TYPE_DECLARATION", - "TOKEN_TYPE_IMMEDIATE", - "TOKEN_TYPE_INSTRUCTION" -}; - -static const char *TGSI_FILES[TGSI_FILE_COUNT] = -{ - "FILE_NULL", - "FILE_CONSTANT", - "FILE_INPUT", - "FILE_OUTPUT", - "FILE_TEMPORARY", - "FILE_SAMPLER", - "FILE_ADDRESS", - "FILE_IMMEDIATE", - "FILE_LOOP", - "FILE_PREDICATE" -}; - -static const char *TGSI_INTERPOLATES[] = -{ - "INTERPOLATE_CONSTANT", - "INTERPOLATE_LINEAR", - "INTERPOLATE_PERSPECTIVE" -}; - -static const char *TGSI_SEMANTICS[] = -{ - "SEMANTIC_POSITION", - "SEMANTIC_COLOR", - "SEMANTIC_BCOLOR", - "SEMANTIC_FOG", - "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC", - "SEMANTIC_NORMAL" -}; - -static const char *TGSI_IMMS[] = -{ - "IMM_FLOAT32" -}; - -static const char *TGSI_SATS[] = -{ - "SAT_NONE", - "SAT_ZERO_ONE", - "SAT_MINUS_PLUS_ONE" -}; - -static const char *TGSI_SWIZZLES[] = -{ - "SWIZZLE_X", - "SWIZZLE_Y", - "SWIZZLE_Z", - "SWIZZLE_W" -}; - -static const char *TGSI_TEXTURES[] = -{ - "TEXTURE_UNKNOWN", - "TEXTURE_1D", - "TEXTURE_2D", - "TEXTURE_3D", - "TEXTURE_CUBE", - "TEXTURE_RECT", - "TEXTURE_SHADOW1D", - "TEXTURE_SHADOW2D", - "TEXTURE_SHADOWRECT" -}; - -static const char *TGSI_WRITEMASKS[] = -{ - "0", - "WRITEMASK_X", - "WRITEMASK_Y", - "WRITEMASK_XY", - "WRITEMASK_Z", - "WRITEMASK_XZ", - "WRITEMASK_YZ", - "WRITEMASK_XYZ", - "WRITEMASK_W", - "WRITEMASK_XW", - "WRITEMASK_YW", - "WRITEMASK_XYW", - "WRITEMASK_ZW", - "WRITEMASK_XZW", - "WRITEMASK_YZW", - "WRITEMASK_XYZW" -}; - -static void -dump_declaration_verbose( - struct tgsi_full_declaration *decl, - unsigned ignored, - unsigned deflt, - struct tgsi_full_declaration *fd ) -{ - TXT( "\nFile : " ); - ENM( decl->Declaration.File, TGSI_FILES ); - if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { - TXT( "\nUsageMask : " ); - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { - CHR( 'X' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { - CHR( 'Y' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { - CHR( 'Z' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { - CHR( 'W' ); - } - } - if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { - TXT( "\nInterpolate: " ); - ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); - } - if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { - TXT( "\nSemantic : " ); - UID( decl->Declaration.Semantic ); - } - if (deflt || fd->Declaration.Centroid != decl->Declaration.Centroid) { - TXT("\nCentroid : "); - UID(decl->Declaration.Centroid); - } - if (deflt || fd->Declaration.Invariant != decl->Declaration.Invariant) { - TXT("\nInvariant : "); - UID(decl->Declaration.Invariant); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Declaration.Padding ); - } - - EOL(); - TXT( "\nFirst: " ); - UID( decl->Range.First ); - TXT( "\nLast : " ); - UID( decl->Range.Last ); - - if( decl->Declaration.Semantic ) { - EOL(); - TXT( "\nName : " ); - ENM( decl->Semantic.Name, TGSI_SEMANTICS ); - TXT( "\nIndex: " ); - UID( decl->Semantic.Index ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Semantic.Padding ); - } - } -} - -static void -dump_immediate_verbose( - struct tgsi_full_immediate *imm, - unsigned ignored ) -{ - unsigned i; - - TXT( "\nDataType : " ); - ENM( imm->Immediate.DataType, TGSI_IMMS ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( imm->Immediate.Padding ); - } - - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { - EOL(); - switch( imm->Immediate.DataType ) { - case TGSI_IMM_FLOAT32: - TXT( "\nFloat: " ); - FLT( imm->u[i].Float ); - break; - - default: - assert( 0 ); - } - } -} - -static void -dump_instruction_verbose( - struct tgsi_full_instruction *inst, - unsigned ignored, - unsigned deflt, - struct tgsi_full_instruction *fi ) -{ - unsigned i; - - TXT( "\nOpcode : OPCODE_" ); - TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); - if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { - TXT( "\nSaturate : " ); - ENM( inst->Instruction.Saturate, TGSI_SATS ); - } - if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { - TXT( "\nNumDstRegs : " ); - UID( inst->Instruction.NumDstRegs ); - } - if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { - TXT( "\nNumSrcRegs : " ); - UID( inst->Instruction.NumSrcRegs ); - } - if (deflt || fi->Instruction.Predicate != inst->Instruction.Predicate) { - TXT("\nPredicate : "); - UID(inst->Instruction.Predicate); - } - if (deflt || fi->Instruction.Label != inst->Instruction.Label) { - TXT("\nLabel : "); - UID(inst->Instruction.Label); - } - if (deflt || fi->Instruction.Texture != inst->Instruction.Texture) { - TXT("\nTexture : "); - UID(inst->Instruction.Texture); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->Instruction.Padding ); - } - - if (deflt || inst->Instruction.Label) { - EOL(); - if (deflt || fi->Label.Label != inst->Label.Label) { - TXT( "\nLabel : " ); - UID(inst->Label.Label); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX(inst->Label.Padding); - } - } - - if (deflt || inst->Instruction.Texture) { - EOL(); - if (deflt || fi->Texture.Texture != inst->Texture.Texture) { - TXT( "\nTexture : " ); - ENM(inst->Texture.Texture, TGSI_TEXTURES); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX(inst->Texture.Padding); - } - } - - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->Dst[i]; - struct tgsi_full_dst_register *fd = &fi->Dst[i]; - - EOL(); - TXT( "\nFile : " ); - ENM( dst->Register.File, TGSI_FILES ); - if( deflt || fd->Register.WriteMask != dst->Register.WriteMask ) { - TXT( "\nWriteMask: " ); - ENM( dst->Register.WriteMask, TGSI_WRITEMASKS ); - } - if( ignored ) { - if( deflt || fd->Register.Indirect != dst->Register.Indirect ) { - TXT( "\nIndirect : " ); - UID( dst->Register.Indirect ); - } - if( deflt || fd->Register.Dimension != dst->Register.Dimension ) { - TXT( "\nDimension: " ); - UID( dst->Register.Dimension ); - } - } - if( deflt || fd->Register.Index != dst->Register.Index ) { - TXT( "\nIndex : " ); - SID( dst->Register.Index ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->Register.Padding ); - } - } - - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->Src[i]; - struct tgsi_full_src_register *fs = &fi->Src[i]; - - EOL(); - TXT( "\nFile : "); - ENM( src->Register.File, TGSI_FILES ); - if( deflt || fs->Register.SwizzleX != src->Register.SwizzleX ) { - TXT( "\nSwizzleX : " ); - ENM( src->Register.SwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fs->Register.SwizzleY != src->Register.SwizzleY ) { - TXT( "\nSwizzleY : " ); - ENM( src->Register.SwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fs->Register.SwizzleZ != src->Register.SwizzleZ ) { - TXT( "\nSwizzleZ : " ); - ENM( src->Register.SwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fs->Register.SwizzleW != src->Register.SwizzleW ) { - TXT( "\nSwizzleW : " ); - ENM( src->Register.SwizzleW, TGSI_SWIZZLES ); - } - if (deflt || fs->Register.Absolute != src->Register.Absolute) { - TXT("\nAbsolute : "); - UID(src->Register.Absolute); - } - if( deflt || fs->Register.Negate != src->Register.Negate ) { - TXT( "\nNegate : " ); - UID( src->Register.Negate ); - } - if( ignored ) { - if( deflt || fs->Register.Indirect != src->Register.Indirect ) { - TXT( "\nIndirect : " ); - UID( src->Register.Indirect ); - } - if( deflt || fs->Register.Dimension != src->Register.Dimension ) { - TXT( "\nDimension: " ); - UID( src->Register.Dimension ); - } - } - if( deflt || fs->Register.Index != src->Register.Index ) { - TXT( "\nIndex : " ); - SID( src->Register.Index ); - } - } -} - -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ) -{ - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - uint ignored = flags & TGSI_DUMP_C_IGNORED; - uint deflt = flags & TGSI_DUMP_C_DEFAULT; - - tgsi_parse_init( &parse, tokens ); - - TXT( "tgsi-dump begin -----------------" ); - - TXT( "\nHeaderSize: " ); - UID( parse.FullHeader.Header.HeaderSize ); - TXT( "\nBodySize : " ); - UID( parse.FullHeader.Header.BodySize ); - TXT( "\nProcessor : " ); - ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); - EOL(); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - TXT( "\nType : " ); - ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); - if( ignored ) { - TXT( "\nSize : " ); - UID( parse.FullToken.Token.NrTokens ); - } - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - dump_declaration_verbose( - &parse.FullToken.FullDeclaration, - ignored, - deflt, - &fd ); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - dump_immediate_verbose( - &parse.FullToken.FullImmediate, - ignored ); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - dump_instruction_verbose( - &parse.FullToken.FullInstruction, - ignored, - deflt, - &fi ); - break; - - default: - assert( 0 ); - } - - EOL(); - } - - TXT( "\ntgsi-dump end -------------------\n" ); - - tgsi_parse_free( &parse ); -} diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 2bcb33392a8..262422364bf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -264,6 +264,12 @@ static void micro_rcp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { +#if 0 /* for debugging */ + assert(src->f[0] != 0.0f); + assert(src->f[1] != 0.0f); + assert(src->f[2] != 0.0f); + assert(src->f[3] != 0.0f); +#endif dst->f[0] = 1.0f / src->f[0]; dst->f[1] = 1.0f / src->f[1]; dst->f[2] = 1.0f / src->f[2]; @@ -284,6 +290,12 @@ static void micro_rsq(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { +#if 0 /* for debugging */ + assert(src->f[0] != 0.0f); + assert(src->f[1] != 0.0f); + assert(src->f[2] != 0.0f); + assert(src->f[3] != 0.0f); +#endif dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); @@ -450,12 +462,20 @@ static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#define CHECK_INF_OR_NAN(chan) do {\ - assert(!util_is_inf_or_nan((chan)->f[0]));\ - assert(!util_is_inf_or_nan((chan)->f[1]));\ - assert(!util_is_inf_or_nan((chan)->f[2]));\ - assert(!util_is_inf_or_nan((chan)->f[3]));\ - } while (0) +/** + * Assert that none of the float values in 'chan' are infinite or NaN. + * NaN and Inf may occur normally during program execution and should + * not lead to crashes, etc. But when debugging, it's helpful to catch + * them. + */ +static INLINE void +check_inf_or_nan(const union tgsi_exec_channel *chan) +{ + assert(!util_is_inf_or_nan((chan)->f[0])); + assert(!util_is_inf_or_nan((chan)->f[1])); + assert(!util_is_inf_or_nan((chan)->f[2])); + assert(!util_is_inf_or_nan((chan)->f[3])); +} #ifdef DEBUG @@ -953,99 +973,90 @@ micro_sub( } static void -fetch_src_file_channel( - const struct tgsi_exec_machine *mach, - const uint file, - const uint swizzle, - const union tgsi_exec_channel *index, - union tgsi_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: - assert(mach->Consts); - if (index->i[0] < 0) - chan->f[0] = 0.0f; - else - chan->f[0] = mach->Consts[index->i[0]][swizzle]; - if (index->i[1] < 0) - chan->f[1] = 0.0f; - else - chan->f[1] = mach->Consts[index->i[1]][swizzle]; - if (index->i[2] < 0) - chan->f[2] = 0.0f; - else - chan->f[2] = mach->Consts[index->i[2]][swizzle]; - if (index->i[3] < 0) - chan->f[3] = 0.0f; - else - chan->f[3] = mach->Consts[index->i[3]][swizzle]; - break; +fetch_src_file_channel(const struct tgsi_exec_machine *mach, + const uint file, + const uint swizzle, + const union tgsi_exec_channel *index, + const union tgsi_exec_channel *index2D, + union tgsi_exec_channel *chan) +{ + uint i; - case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; + switch (file) { + case TGSI_FILE_CONSTANT: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); + assert(mach->Consts[index2D->i[i]]); - case TGSI_FILE_TEMPORARY: - assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; + if (index->i[i] < 0) { + chan->u[i] = 0; + } else { + const uint *p = (const uint *)mach->Consts[index2D->i[i]]; - case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - assert( index->i[1] < (int) mach->ImmLimit ); - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - assert( index->i[2] < (int) mach->ImmLimit ); - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - assert( index->i[3] < (int) mach->ImmLimit ); - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; + chan->u[i] = p[index->i[i] * 4 + swizzle]; + } + } + break; - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; + case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: + for (i = 0; i < QUAD_SIZE; i++) { + /* XXX: 2D indexing */ + chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; + } + break; - case TGSI_FILE_PREDICATE: - assert(index->i[0] < TGSI_EXEC_NUM_PREDS); - assert(index->i[1] < TGSI_EXEC_NUM_PREDS); - assert(index->i[2] < TGSI_EXEC_NUM_PREDS); - assert(index->i[3] < TGSI_EXEC_NUM_PREDS); - chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; - chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; - chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; - chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; - break; + case TGSI_FILE_TEMPORARY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); + assert(index2D->i[i] == 0); - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; + chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; + } + break; - default: - assert( 0 ); + case TGSI_FILE_IMMEDIATE: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); + assert(index2D->i[i] == 0); + + chan->f[i] = mach->Imms[index->i[i]][swizzle]; + } + break; + + case TGSI_FILE_ADDRESS: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0); + assert(index2D->i[i] == 0); + + chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; + } + break; + + case TGSI_FILE_PREDICATE: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); + assert(index2D->i[i] == 0); + + chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; + } + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0); + assert(index2D->i[i] == 0); + + chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; } break; default: - assert( 0 ); + assert(0); + for (i = 0; i < QUAD_SIZE; i++) { + chan->u[i] = 0; + } } } @@ -1057,6 +1068,7 @@ fetch_source(const struct tgsi_exec_machine *mach, enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; + union tgsi_exec_channel index2D; uint swizzle; /* We start with a direct index into a register file. @@ -1095,12 +1107,12 @@ fetch_source(const struct tgsi_exec_machine *mach, /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index2, - &indir_index ); + fetch_src_file_channel(mach, + reg->Indirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); /* add value of address register to the offset */ index.i[0] += indir_index.i[0]; @@ -1121,44 +1133,22 @@ fetch_source(const struct tgsi_exec_machine *mach, * subscript to a register file. Effectively it means that * the register file is actually a 2D array of registers. * - * file[1][3] == file[1*sizeof(file[1])+3], + * file[3][1], * where: * [3] = Dimension.Index */ if (reg->Register.Dimension) { - /* The size of the first-order array depends on the register file type. - * We need to multiply the index to the first array to get an effective, - * "flat" index that points to the beginning of the second-order array. - */ - switch (reg->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: - index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - break; - case TGSI_FILE_CONSTANT: - index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; - break; - default: - assert( 0 ); - } - - index.i[0] += reg->Dimension.Index; - index.i[1] += reg->Dimension.Index; - index.i[2] += reg->Dimension.Index; - index.i[3] += reg->Dimension.Index; + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. * Nothing stops us from indirectly addressing the indirect register, * but there is no need for that, so we won't exercise it. * - * file[1][ind[4].y+3], + * file[ind[4].y+3][1], * where: * ind = DimIndirect.File * [4] = DimIndirect.Index @@ -1176,24 +1166,25 @@ fetch_source(const struct tgsi_exec_machine *mach, index2.i[3] = reg->DimIndirect.Index; swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->DimIndirect.File, - swizzle, - &index2, - &indir_index ); - - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + fetch_src_file_channel(mach, + reg->DimIndirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); + + index2D.i[0] += indir_index.i[0]; + index2D.i[1] += indir_index.i[1]; + index2D.i[2] += indir_index.i[2]; + index2D.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ for (i = 0; i < QUAD_SIZE; i++) { - if ((execmask & (1 << i)) == 0) - index.i[i] = 0; + if ((execmask & (1 << i)) == 0) { + index2D.i[i] = 0; + } } } @@ -1201,15 +1192,20 @@ fetch_source(const struct tgsi_exec_machine *mach, * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. */ + } else { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = 0; } swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->Register.File, - swizzle, - &index, - chan ); + fetch_src_file_channel(mach, + reg->Register.File, + swizzle, + &index, + &index2D, + chan); if (reg->Register.Absolute) { if (src_datatype == TGSI_EXEC_DATA_FLOAT) { @@ -1243,8 +1239,9 @@ store_dest(struct tgsi_exec_machine *mach, int offset = 0; /* indirection offset */ int index; - if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { - CHECK_INF_OR_NAN(chan); + /* for debugging */ + if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { + check_inf_or_nan(chan); } /* There is an extra source register that indirectly subscripts @@ -1272,12 +1269,12 @@ store_dest(struct tgsi_exec_machine *mach, swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); /* fetch values from the address/indirection register */ - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index, - &indir_index ); + fetch_src_file_channel(mach, + reg->Indirect.File, + swizzle, + &index, + &ZeroVec, + &indir_index); /* save indirection offset */ offset = indir_index.i[0]; @@ -1502,7 +1499,7 @@ emit_primitive(struct tgsi_exec_machine *mach) } /* - * Fetch a four texture samples using STR texture coordinates. + * Fetch four texture samples using STR texture coordinates. */ static void fetch_texel( struct tgsi_sampler *sampler, @@ -1764,13 +1761,7 @@ exec_declaration(struct tgsi_exec_machine *mach, last = decl->Range.Last; mask = decl->Declaration.UsageMask; - if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { - assert(decl->Semantic.Index == 0); - assert(first == last); - assert(mask == TGSI_WRITEMASK_XYZW); - - mach->Inputs[first] = mach->QuadPos; - } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { + if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { uint i; assert(decl->Semantic.Index == 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 59e3b445cc3..a22873e4c2b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -260,7 +260,7 @@ struct tgsi_exec_machine struct tgsi_sampler **Samplers; unsigned ImmLimit; - const float (*Consts)[4]; + const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; const struct tgsi_token *Tokens; /**< Declarations, instructions */ unsigned Processor; /**< TGSI_PROCESSOR_x */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 8c7062d850c..7e19e1fe36f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -109,6 +109,10 @@ tgsi_parse_token( next_token( ctx, &decl->Range ); + if (decl->Declaration.Dimension) { + next_token(ctx, &decl->Dim); + } + if( decl->Declaration.Semantic ) { next_token( ctx, &decl->Semantic ); } @@ -280,3 +284,14 @@ tgsi_dup_tokens(const struct tgsi_token *tokens) memcpy(new_tokens, tokens, bytes); return new_tokens; } + + +/** + * Allocate memory for num_tokens tokens. + */ +struct tgsi_token * +tgsi_alloc_tokens(unsigned num_tokens) +{ + unsigned bytes = num_tokens * sizeof(struct tgsi_token); + return (struct tgsi_token *) MALLOC(bytes); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 439a57269b7..b45ccee2f63 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -58,6 +58,7 @@ struct tgsi_full_declaration { struct tgsi_declaration Declaration; struct tgsi_declaration_range Range; + struct tgsi_declaration_dimension Dim; struct tgsi_declaration_semantic Semantic; }; @@ -129,6 +130,10 @@ tgsi_num_tokens(const struct tgsi_token *tokens); struct tgsi_token * tgsi_dup_tokens(const struct tgsi_token *tokens); +struct tgsi_token * +tgsi_alloc_tokens(unsigned num_tokens); + + #if defined __cplusplus } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095bb..ad553c71a57 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,8 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) const float +ppc_builtin_constants[] = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 7f1c8e5dd68..91e1b27da12 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -335,13 +335,9 @@ iter_instruction( fill_scan_register1d(ind_reg, inst->Src[i].Indirect.File, inst->Src[i].Indirect.Index); - if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || - reg->indices[0] != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } check_register_usage( ctx, - reg, + ind_reg, "indirect", FALSE ); } @@ -412,12 +408,16 @@ iter_declaration( uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); - fill_scan_register2d(reg, file, vert, i); + fill_scan_register2d(reg, file, i, vert); check_and_declare(ctx, reg); } } else { scan_register *reg = MALLOC(sizeof(scan_register)); - fill_scan_register1d(reg, file, i); + if (decl->Declaration.Dimension) { + fill_scan_register2d(reg, file, i, decl->Dim.Index2D); + } else { + fill_scan_register1d(reg, file, i); + } check_and_declare(ctx, reg); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index a6cc773003a..232fc537c1d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -101,12 +101,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens, src->Register.File == TGSI_FILE_SYSTEM_VALUE) { const int ind = src->Register.Index; if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { - if (src->Register.SwizzleX == TGSI_SWIZZLE_X) { - info->uses_fogcoord = TRUE; - } - else if (src->Register.SwizzleX == TGSI_SWIZZLE_Y) { - info->uses_frontfacing = TRUE; - } + info->uses_fogcoord = TRUE; + } + else if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FACE) { + info->uses_frontfacing = TRUE; } } } @@ -133,6 +131,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; + info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; info->num_inputs++; } else if (file == TGSI_FILE_OUTPUT) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index dae5376c24a..741aa7d5c42 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -45,6 +45,7 @@ struct tgsi_shader_info ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; + ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9fcffeda368..f918151daaa 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -29,7 +29,7 @@ #include "util/u_memory.h" #include "util/u_prim.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -553,7 +553,7 @@ parse_register_dcl_bracket( report_error( ctx, "Expected literal unsigned integer" ); return FALSE; } - bracket->first = (int) uindex; + bracket->first = uindex; eat_opt_white( &ctx->cur ); @@ -617,10 +617,12 @@ parse_register_dcl( * input primitive. so we want to declare just * the index relevant to the semantics which is in * the second bracket */ - if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + if (ctx->processor == TGSI_PROCESSOR_GEOMETRY && *file == TGSI_FILE_INPUT) { brackets[0] = brackets[1]; + *num_brackets = 1; + } else { + *num_brackets = 2; } - *num_brackets = 2; } return TRUE; @@ -738,6 +740,13 @@ parse_src_operand( return FALSE; src->Register.File = file; + if (parsed_opt_brackets) { + src->Register.Dimension = 1; + src->Dimension.Indirect = 0; + src->Dimension.Dimension = 0; + src->Dimension.Index = bracket[0].index; + bracket[0] = bracket[1]; + } src->Register.Index = bracket[0].index; if (bracket[0].ind_file != TGSI_FILE_NULL) { src->Register.Indirect = 1; @@ -748,12 +757,6 @@ parse_src_operand( src->Indirect.SwizzleZ = bracket[0].ind_comp; src->Indirect.SwizzleW = bracket[0].ind_comp; } - if (parsed_opt_brackets) { - src->Register.Dimension = 1; - src->Dimension.Indirect = 0; - src->Dimension.Dimension = 0; - src->Dimension.Index = bracket[1].index; - } /* Parse optional swizzle. */ @@ -933,7 +936,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "NORMAL", "FACE", "EDGEFLAG", - "PRIM_ID" + "PRIM_ID", + "INSTANCEID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = @@ -968,8 +972,17 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.Range.First = brackets[0].first; - decl.Range.Last = brackets[0].last; + + if (num_brackets == 1) { + decl.Range.First = brackets[0].first; + decl.Range.Last = brackets[0].last; + } else { + decl.Range.First = brackets[1].first; + decl.Range.Last = brackets[1].last; + + decl.Declaration.Dimension = 1; + decl.Dim.Index2D = brackets[0].first; + } cur = ctx->cur; eat_opt_white( &cur ); @@ -1116,7 +1129,9 @@ static const char *property_names[] = { "GS_INPUT_PRIMITIVE", "GS_OUTPUT_PRIMITIVE", - "GS_MAX_OUTPUT_VERTICES" + "GS_MAX_OUTPUT_VERTICES", + "FS_COORD_ORIGIN", + "FS_COORD_PIXEL_CENTER" }; static const char *primitive_names[] = @@ -1133,6 +1148,19 @@ static const char *primitive_names[] = "POLYGON" }; +static const char *fs_coord_origin_names[] = +{ + "UPPER_LEFT", + "LOWER_LEFT" +}; + +static const char *fs_coord_pixel_center_names[] = +{ + "HALF_INTEGER", + "INTEGER" +}; + + static boolean parse_primitive( const char **pcur, uint *primitive ) { @@ -1150,6 +1178,40 @@ parse_primitive( const char **pcur, uint *primitive ) return FALSE; } +static boolean +parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin ) +{ + uint i; + + for (i = 0; i < sizeof(fs_coord_origin_names) / sizeof(fs_coord_origin_names[0]); i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, fs_coord_origin_names[i])) { + *fs_coord_origin = i; + *pcur = cur; + return TRUE; + } + } + return FALSE; +} + +static boolean +parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) +{ + uint i; + + for (i = 0; i < sizeof(fs_coord_pixel_center_names) / sizeof(fs_coord_pixel_center_names[0]); i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, fs_coord_pixel_center_names[i])) { + *fs_coord_pixel_center = i; + *pcur = cur; + return TRUE; + } + } + return FALSE; +} + static boolean parse_property( struct translate_ctx *ctx ) { @@ -1191,6 +1253,18 @@ static boolean parse_property( struct translate_ctx *ctx ) ctx->implied_array_size = u_vertices_per_prim(values[0]); } break; + case TGSI_PROPERTY_FS_COORD_ORIGIN: + if (!parse_fs_coord_origin(&ctx->cur, &values[0] )) { + report_error( ctx, "Unknown coord origin as property: must be UPPER_LEFT or LOWER_LEFT!" ); + return FALSE; + } + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + if (!parse_fs_coord_pixel_center(&ctx->cur, &values[0] )) { + report_error( ctx, "Unknown coord pixel center as property: must be HALF_INTEGER or INTEGER!" ); + return FALSE; + } + break; default: if (!parse_uint(&ctx->cur, &values[0] )) { report_error( ctx, "Expected unsigned integer as property!" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index e64e2b731df..3d0455de7ce 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -33,6 +33,7 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_sanity.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -40,8 +41,11 @@ union tgsi_any_token { struct tgsi_header header; struct tgsi_processor processor; struct tgsi_token token; + struct tgsi_property prop; + struct tgsi_property_data prop_data; struct tgsi_declaration decl; struct tgsi_declaration_range decl_range; + struct tgsi_declaration_dimension decl_dim; struct tgsi_declaration_semantic decl_semantic; struct tgsi_immediate imm; union tgsi_immediate_data imm_data; @@ -64,6 +68,7 @@ struct ureg_tokens { }; #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 32 @@ -72,6 +77,14 @@ struct ureg_tokens { #define UREG_MAX_LOOP 1 #define UREG_MAX_PRED 1 +struct const_decl { + struct { + unsigned first; + unsigned last; + } constant_range[UREG_MAX_CONSTANT_RANGE]; + unsigned nr_constant_ranges; +}; + #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -84,6 +97,7 @@ struct ureg_program unsigned semantic_name; unsigned semantic_index; unsigned interp; + unsigned cylindrical_wrap; } fs_input[UREG_MAX_INPUT]; unsigned nr_fs_inputs; @@ -91,10 +105,19 @@ struct ureg_program struct { unsigned index; + unsigned semantic_name; + unsigned semantic_index; } gs_input[UREG_MAX_INPUT]; unsigned nr_gs_inputs; struct { + unsigned index; + unsigned semantic_name; + unsigned semantic_index; + } system_value[UREG_MAX_SYSTEM_VALUE]; + unsigned nr_system_values; + + struct { unsigned semantic_name; unsigned semantic_index; } output[UREG_MAX_OUTPUT]; @@ -117,11 +140,14 @@ struct ureg_program unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; - struct { - unsigned first; - unsigned last; - } constant_range[UREG_MAX_CONSTANT_RANGE]; - unsigned nr_constant_ranges; + struct const_decl const_decls; + struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS]; + + unsigned property_gs_input_prim; + unsigned property_gs_output_prim; + unsigned property_gs_max_vertices; + unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ + unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ unsigned nr_addrs; unsigned nr_preds; @@ -223,57 +249,72 @@ ureg_dst_register( unsigned file, return dst; } -static INLINE struct ureg_src -ureg_src_register( unsigned file, - unsigned index ) + +void +ureg_property_gs_input_prim(struct ureg_program *ureg, + unsigned input_prim) { - struct ureg_src src; - - src.File = file; - src.SwizzleX = TGSI_SWIZZLE_X; - src.SwizzleY = TGSI_SWIZZLE_Y; - src.SwizzleZ = TGSI_SWIZZLE_Z; - src.SwizzleW = TGSI_SWIZZLE_W; - src.Pad = 0; - src.Indirect = 0; - src.IndirectIndex = 0; - src.IndirectSwizzle = 0; - src.Absolute = 0; - src.Index = index; - src.Negate = 0; - - return src; + ureg->property_gs_input_prim = input_prim; } +void +ureg_property_gs_output_prim(struct ureg_program *ureg, + unsigned output_prim) +{ + ureg->property_gs_output_prim = output_prim; +} +void +ureg_property_gs_max_vertices(struct ureg_program *ureg, + unsigned max_vertices) +{ + ureg->property_gs_max_vertices = max_vertices; +} +void +ureg_property_fs_coord_origin(struct ureg_program *ureg, + unsigned fs_coord_origin) +{ + ureg->property_fs_coord_origin = fs_coord_origin; +} -struct ureg_src -ureg_DECL_fs_input( struct ureg_program *ureg, - unsigned name, - unsigned index, - unsigned interp_mode ) +void +ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, + unsigned fs_coord_pixel_center) +{ + ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; +} + + + +struct ureg_src +ureg_DECL_fs_input_cyl(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp_mode, + unsigned cylindrical_wrap) { unsigned i; for (i = 0; i < ureg->nr_fs_inputs; i++) { - if (ureg->fs_input[i].semantic_name == name && - ureg->fs_input[i].semantic_index == index) + if (ureg->fs_input[i].semantic_name == semantic_name && + ureg->fs_input[i].semantic_index == semantic_index) { goto out; + } } if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { - ureg->fs_input[i].semantic_name = name; - ureg->fs_input[i].semantic_index = index; + ureg->fs_input[i].semantic_name = semantic_name; + ureg->fs_input[i].semantic_index = semantic_index; ureg->fs_input[i].interp = interp_mode; + ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap; ureg->nr_fs_inputs++; - } - else { - set_bad( ureg ); + } else { + set_bad(ureg); } out: - return ureg_src_register( TGSI_FILE_INPUT, i ); + return ureg_src_register(TGSI_FILE_INPUT, i); } @@ -290,10 +331,14 @@ ureg_DECL_vs_input( struct ureg_program *ureg, struct ureg_src ureg_DECL_gs_input(struct ureg_program *ureg, - unsigned index) + unsigned index, + unsigned semantic_name, + unsigned semantic_index) { if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { ureg->gs_input[ureg->nr_gs_inputs].index = index; + ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name; + ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index; ureg->nr_gs_inputs++; } else { set_bad(ureg); @@ -304,6 +349,25 @@ ureg_DECL_gs_input(struct ureg_program *ureg, } +struct ureg_src +ureg_DECL_system_value(struct ureg_program *ureg, + unsigned index, + unsigned semantic_name, + unsigned semantic_index) +{ + if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { + ureg->system_value[ureg->nr_system_values].index = index; + ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; + ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; + ureg->nr_system_values++; + } else { + set_bad(ureg); + } + + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); +} + + struct ureg_dst ureg_DECL_output( struct ureg_program *ureg, unsigned name, @@ -334,62 +398,92 @@ out: /* Returns a new constant register. Keep track of which have been * referred to so that we can emit decls later. * + * Constant operands declared with this function must be addressed + * with a two-dimensional index. + * * There is nothing in this code to bind this constant to any tracked * value or manage any constant_buffer contents -- that's the * resposibility of the calling code. */ -struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, - unsigned index ) +void +ureg_DECL_constant2D(struct ureg_program *ureg, + unsigned first, + unsigned last, + unsigned index2D) { + struct const_decl *decl = &ureg->const_decls2D[index2D]; + + assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); + + if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { + uint i = decl->nr_constant_ranges++; + + decl->constant_range[i].first = first; + decl->constant_range[i].last = last; + } +} + + +/* A one-dimensional, depricated version of ureg_DECL_constant2D(). + * + * Constant operands declared with this function must be addressed + * with a one-dimensional index. + */ +struct ureg_src +ureg_DECL_constant(struct ureg_program *ureg, + unsigned index) +{ + struct const_decl *decl = &ureg->const_decls; unsigned minconst = index, maxconst = index; unsigned i; /* Inside existing range? */ - for (i = 0; i < ureg->nr_constant_ranges; i++) { - if (ureg->constant_range[i].first <= index && - ureg->constant_range[i].last >= index) + for (i = 0; i < decl->nr_constant_ranges; i++) { + if (decl->constant_range[i].first <= index && + decl->constant_range[i].last >= index) { goto out; + } } /* Extend existing range? */ - for (i = 0; i < ureg->nr_constant_ranges; i++) { - if (ureg->constant_range[i].last == index - 1) { - ureg->constant_range[i].last = index; + for (i = 0; i < decl->nr_constant_ranges; i++) { + if (decl->constant_range[i].last == index - 1) { + decl->constant_range[i].last = index; goto out; } - if (ureg->constant_range[i].first == index + 1) { - ureg->constant_range[i].first = index; + if (decl->constant_range[i].first == index + 1) { + decl->constant_range[i].first = index; goto out; } - minconst = MIN2(minconst, ureg->constant_range[i].first); - maxconst = MAX2(maxconst, ureg->constant_range[i].last); + minconst = MIN2(minconst, decl->constant_range[i].first); + maxconst = MAX2(maxconst, decl->constant_range[i].last); } /* Create new range? */ - if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { - i = ureg->nr_constant_ranges++; - ureg->constant_range[i].first = index; - ureg->constant_range[i].last = index; + if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { + i = decl->nr_constant_ranges++; + decl->constant_range[i].first = index; + decl->constant_range[i].last = index; goto out; } /* Collapse all ranges down to one: */ i = 0; - ureg->constant_range[0].first = minconst; - ureg->constant_range[0].last = maxconst; - ureg->nr_constant_ranges = 1; + decl->constant_range[0].first = minconst; + decl->constant_range[0].last = maxconst; + decl->nr_constant_ranges = 1; out: - assert(i < ureg->nr_constant_ranges); - assert(ureg->constant_range[i].first <= index); - assert(ureg->constant_range[i].last >= index); - return ureg_src_register( TGSI_FILE_CONSTANT, index ); + assert(i < decl->nr_constant_ranges); + assert(decl->constant_range[i].first <= index); + assert(decl->constant_range[i].last >= index); + return ureg_src_register(TGSI_FILE_CONSTANT, index); } @@ -538,7 +632,7 @@ decl_immediate( struct ureg_program *ureg, unsigned type ) { unsigned i, j; - unsigned swizzle; + unsigned swizzle = 0; /* Could do a first pass where we examine all existing immediates * without expanding. @@ -616,6 +710,35 @@ ureg_DECL_immediate_uint( struct ureg_program *ureg, struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + uint index; + uint i; + + if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { + set_bad(ureg); + return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); + } + + index = ureg->nr_immediates; + ureg->nr_immediates += (nr + 3) / 4; + + for (i = index; i < ureg->nr_immediates; i++) { + ureg->immediate[i].type = TGSI_IMM_UINT32; + ureg->immediate[i].nr = nr > 4 ? 4 : nr; + memcpy(ureg->immediate[i].value.u, + &v[(i - index) * 4], + ureg->immediate[i].nr * sizeof(uint)); + nr -= 4; + } + + return ureg_src_register(TGSI_FILE_IMMEDIATE, index); +} + + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *ureg, const int *v, unsigned nr ) @@ -628,7 +751,7 @@ void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = 1 + (src.Indirect ? 1 : 0); + unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -651,7 +774,7 @@ ureg_emit_src( struct ureg_program *ureg, if (src.Indirect) { out[0].src.Indirect = 1; out[n].value = 0; - out[n].src.File = TGSI_FILE_ADDRESS; + out[n].src.File = src.IndirectFile; out[n].src.SwizzleX = src.IndirectSwizzle; out[n].src.SwizzleY = src.IndirectSwizzle; out[n].src.SwizzleZ = src.IndirectSwizzle; @@ -660,6 +783,15 @@ ureg_emit_src( struct ureg_program *ureg, n++; } + if (src.Dimension) { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 0; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + n++; + } + assert(n == size); } @@ -959,32 +1091,59 @@ ureg_label_insn(struct ureg_program *ureg, } - -static void emit_decl( struct ureg_program *ureg, - unsigned file, - unsigned index, - unsigned semantic_name, - unsigned semantic_index, - unsigned interp ) +static void +emit_decl_semantic(struct ureg_program *ureg, + unsigned file, + unsigned index, + unsigned semantic_name, + unsigned semantic_index) { - union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); out[0].value = 0; out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 3; out[0].decl.File = file; out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ - out[0].decl.Interpolate = interp; out[0].decl.Semantic = 1; out[1].value = 0; - out[1].decl_range.First = - out[1].decl_range.Last = index; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; out[2].value = 0; out[2].decl_semantic.Name = semantic_name; out[2].decl_semantic.Index = semantic_index; +} + + +static void +emit_decl_fs(struct ureg_program *ureg, + unsigned file, + unsigned index, + unsigned semantic_name, + unsigned semantic_index, + unsigned interpolate, + unsigned cylindrical_wrap) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = file; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ + out[0].decl.Interpolate = interpolate; + out[0].decl.Semantic = 1; + out[0].decl.CylindricalWrap = cylindrical_wrap; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; + out[2].value = 0; + out[2].decl_semantic.Name = semantic_name; + out[2].decl_semantic.Index = semantic_index; } @@ -1009,6 +1168,31 @@ static void emit_decl_range( struct ureg_program *ureg, } static void +emit_decl_range2D(struct ureg_program *ureg, + unsigned file, + unsigned first, + unsigned last, + unsigned index2D) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = file; + out[0].decl.UsageMask = 0xf; + out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; + out[0].decl.Dimension = 1; + + out[1].value = 0; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; + + out[2].value = 0; + out[2].decl_dim.Index2D = index2D; +} + +static void emit_immediate( struct ureg_program *ureg, const unsigned *v, unsigned type ) @@ -1027,13 +1211,66 @@ emit_immediate( struct ureg_program *ureg, out[4].imm_data.Uint = v[3]; } +static void +emit_property(struct ureg_program *ureg, + unsigned name, + unsigned data) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + out[0].value = 0; + out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY; + out[0].prop.NrTokens = 2; + out[0].prop.PropertyName = name; + + out[1].prop_data.Data = data; +} static void emit_decls( struct ureg_program *ureg ) { unsigned i; + if (ureg->property_gs_input_prim != ~0) { + assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); + + emit_property(ureg, + TGSI_PROPERTY_GS_INPUT_PRIM, + ureg->property_gs_input_prim); + } + + if (ureg->property_gs_output_prim != ~0) { + assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); + + emit_property(ureg, + TGSI_PROPERTY_GS_OUTPUT_PRIM, + ureg->property_gs_output_prim); + } + + if (ureg->property_gs_max_vertices != ~0) { + assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); + + emit_property(ureg, + TGSI_PROPERTY_GS_MAX_VERTICES, + ureg->property_gs_max_vertices); + } + + if (ureg->property_fs_coord_origin) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); + + emit_property(ureg, + TGSI_PROPERTY_FS_COORD_ORIGIN, + ureg->property_fs_coord_origin); + } + + if (ureg->property_fs_coord_pixel_center) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); + + emit_property(ureg, + TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, + ureg->property_fs_coord_pixel_center); + } + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < UREG_MAX_INPUT; i++) { if (ureg->vs_inputs[i/32] & (1 << (i%32))) { @@ -1042,29 +1279,38 @@ static void emit_decls( struct ureg_program *ureg ) } } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { for (i = 0; i < ureg->nr_fs_inputs; i++) { - emit_decl( ureg, - TGSI_FILE_INPUT, - i, - ureg->fs_input[i].semantic_name, - ureg->fs_input[i].semantic_index, - ureg->fs_input[i].interp ); + emit_decl_fs(ureg, + TGSI_FILE_INPUT, + i, + ureg->fs_input[i].semantic_name, + ureg->fs_input[i].semantic_index, + ureg->fs_input[i].interp, + ureg->fs_input[i].cylindrical_wrap); } } else { for (i = 0; i < ureg->nr_gs_inputs; i++) { - emit_decl_range(ureg, - TGSI_FILE_INPUT, - ureg->gs_input[i].index, - 1); + emit_decl_semantic(ureg, + TGSI_FILE_INPUT, + ureg->gs_input[i].index, + ureg->gs_input[i].semantic_name, + ureg->gs_input[i].semantic_index); } } + for (i = 0; i < ureg->nr_system_values; i++) { + emit_decl_semantic(ureg, + TGSI_FILE_SYSTEM_VALUE, + ureg->system_value[i].index, + ureg->system_value[i].semantic_name, + ureg->system_value[i].semantic_index); + } + for (i = 0; i < ureg->nr_outputs; i++) { - emit_decl( ureg, - TGSI_FILE_OUTPUT, - i, - ureg->output[i].semantic_name, - ureg->output[i].semantic_index, - TGSI_INTERPOLATE_CONSTANT ); + emit_decl_semantic(ureg, + TGSI_FILE_OUTPUT, + i, + ureg->output[i].semantic_name, + ureg->output[i].semantic_index); } for (i = 0; i < ureg->nr_samplers; i++) { @@ -1073,13 +1319,29 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } - if (ureg->nr_constant_ranges) { - for (i = 0; i < ureg->nr_constant_ranges; i++) - emit_decl_range( ureg, - TGSI_FILE_CONSTANT, - ureg->constant_range[i].first, - (ureg->constant_range[i].last + 1 - - ureg->constant_range[i].first) ); + if (ureg->const_decls.nr_constant_ranges) { + for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { + emit_decl_range(ureg, + TGSI_FILE_CONSTANT, + ureg->const_decls.constant_range[i].first, + ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1); + } + } + + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + struct const_decl *decl = &ureg->const_decls2D[i]; + + if (decl->nr_constant_ranges) { + uint j; + + for (j = 0; j < decl->nr_constant_ranges; j++) { + emit_decl_range2D(ureg, + TGSI_FILE_CONSTANT, + decl->constant_range[j].first, + decl->constant_range[j].last, + i); + } + } } if (ureg->nr_temps) { @@ -1234,6 +1496,9 @@ struct ureg_program *ureg_create( unsigned processor ) return NULL; ureg->processor = processor; + ureg->property_gs_input_prim = ~0; + ureg->property_gs_output_prim = ~0; + ureg->property_gs_max_vertices = ~0; return ureg; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 6f11273320a..0130a77aadb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" +#include "util/u_debug.h" #ifdef __cplusplus extern "C" { @@ -47,13 +48,15 @@ struct ureg_src unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ - unsigned Pad : 1; /* BOOL */ unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ unsigned Absolute : 1; /* BOOL */ - int Index : 16; /* SINT */ unsigned Negate : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned IndirectFile : 4; /* TGSI_FILE_ */ int IndirectIndex : 16; /* SINT */ - int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -118,16 +121,53 @@ ureg_create_shader_and_destroy( struct ureg_program *p, } +/*********************************************************************** + * Build shader properties: + */ + +void +ureg_property_gs_input_prim(struct ureg_program *ureg, + unsigned input_prim); + +void +ureg_property_gs_output_prim(struct ureg_program *ureg, + unsigned output_prim); + +void +ureg_property_gs_max_vertices(struct ureg_program *ureg, + unsigned max_vertices); + +void +ureg_property_fs_coord_origin(struct ureg_program *ureg, + unsigned fs_coord_origin); + +void +ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, + unsigned fs_coord_pixel_center); /*********************************************************************** * Build shader declarations: */ struct ureg_src -ureg_DECL_fs_input( struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index, - unsigned interp_mode ); +ureg_DECL_fs_input_cyl(struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp_mode, + unsigned cylindrical_wrap); + +static INLINE struct ureg_src +ureg_DECL_fs_input(struct ureg_program *ureg, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp_mode) +{ + return ureg_DECL_fs_input_cyl(ureg, + semantic_name, + semantic_index, + interp_mode, + 0); +} struct ureg_src ureg_DECL_vs_input( struct ureg_program *, @@ -135,7 +175,15 @@ ureg_DECL_vs_input( struct ureg_program *, struct ureg_src ureg_DECL_gs_input(struct ureg_program *, - unsigned index); + unsigned index, + unsigned semantic_name, + unsigned semantic_index); + +struct ureg_src +ureg_DECL_system_value(struct ureg_program *, + unsigned index, + unsigned semantic_name, + unsigned semantic_index); struct ureg_dst ureg_DECL_output( struct ureg_program *, @@ -153,10 +201,21 @@ ureg_DECL_immediate_uint( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *, const int *v, unsigned nr ); +void +ureg_DECL_constant2D(struct ureg_program *ureg, + unsigned first, + unsigned last, + unsigned index2D); + struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -753,18 +812,30 @@ static INLINE struct ureg_src ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) { assert(reg.File != TGSI_FILE_NULL); - assert(addr.File == TGSI_FILE_ADDRESS); + assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY); reg.Indirect = 1; + reg.IndirectFile = addr.File; reg.IndirectIndex = addr.Index; reg.IndirectSwizzle = addr.SwizzleX; return reg; } +static INLINE struct ureg_src +ureg_src_dimension( struct ureg_src reg, int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimensionIndex = index; + return reg; +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { struct ureg_dst dst; + assert(!src.Indirect || src.IndirectFile == TGSI_FILE_ADDRESS); + dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; @@ -783,6 +854,30 @@ ureg_dst( struct ureg_src src ) } static INLINE struct ureg_src +ureg_src_register(unsigned file, + unsigned index) +{ + struct ureg_src src; + + src.File = file; + src.SwizzleX = TGSI_SWIZZLE_X; + src.SwizzleY = TGSI_SWIZZLE_Y; + src.SwizzleZ = TGSI_SWIZZLE_Z; + src.SwizzleW = TGSI_SWIZZLE_W; + src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; + src.Absolute = 0; + src.Index = index; + src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; + + return src; +} + +static INLINE struct ureg_src ureg_src( struct ureg_dst dst ) { struct ureg_src src; @@ -792,13 +887,15 @@ ureg_src( struct ureg_dst dst ) src.SwizzleY = TGSI_SWIZZLE_Y; src.SwizzleZ = TGSI_SWIZZLE_Z; src.SwizzleW = TGSI_SWIZZLE_W; - src.Pad = 0; src.Indirect = dst.Indirect; + src.IndirectFile = TGSI_FILE_ADDRESS; src.IndirectIndex = dst.IndirectIndex; src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } @@ -837,13 +934,15 @@ ureg_src_undef( void ) src.SwizzleY = 0; src.SwizzleZ = 0; src.SwizzleW = 0; - src.Pad = 0; src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; src.IndirectIndex = 0; src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = 0; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index f4ca9e21ed9..0a7e4105a80 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -28,7 +28,6 @@ #include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" -#include "tgsi_build.h" #include "tgsi_util.h" union pointer_hack diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 34526eb0617..54ed2c1a4be 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -44,12 +44,19 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + struct translate_element { + enum translate_element_type type; enum pipe_format input_format; enum pipe_format output_format; unsigned input_buffer:8; unsigned input_offset:24; + unsigned instance_divisor; unsigned output_offset; }; @@ -74,11 +81,13 @@ struct translate { void (PIPE_CDECL *run_elts)( struct translate *, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer); void (PIPE_CDECL *run)( struct translate *, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer); }; @@ -103,8 +112,13 @@ static INLINE int translate_keysize( const struct translate_key *key ) static INLINE int translate_key_compare( const struct translate_key *a, const struct translate_key *b ) { - int keysize = translate_keysize(a); - return memcmp(a, b, keysize); + int keysize_a = translate_keysize(a); + int keysize_b = translate_keysize(b); + + if (keysize_a != keysize_b) { + return keysize_a - keysize_b; + } + return memcmp(a, b, keysize_a); } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 266e7ee81e6..24727d49888 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -46,9 +46,12 @@ struct translate_generic { struct translate translate; struct { + enum translate_element_type type; + fetch_func fetch; unsigned buffer; unsigned input_offset; + unsigned instance_divisor; emit_func emit; unsigned output_offset; @@ -568,6 +571,7 @@ static emit_func get_emit_func( enum pipe_format format ) static void PIPE_CDECL generic_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -583,13 +587,20 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); + const char *src; char *dst = (vert + tg->attrib[attr].output_offset); + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", @@ -607,6 +618,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -623,13 +635,25 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); - char *dst = (vert + tg->attrib[attr].output_offset); - tg->attrib[attr].fetch( src, data ); + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); + } else { + data[0] = (float)instance_id; + } if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", i, attr, data[0], data[1], data[2], data[3]); @@ -683,10 +707,12 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); tg->attrib[i].output_offset = key->element[i].output_offset; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index b62db8d8f33..c13e7427387 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -49,19 +49,29 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); struct translate_buffer { const void *base_ptr; unsigned stride; - void *ptr; /* updated per vertex */ }; +struct translate_buffer_varient { + unsigned buffer_index; + unsigned instance_divisor; + void *ptr; /* updated either per vertex or per instance */ +}; + + +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + struct translate_sse { struct translate translate; @@ -81,6 +91,16 @@ struct translate_sse { struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; unsigned nr_buffers; + /* Multiple buffer varients can map to a single buffer. */ + struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; + unsigned nr_buffer_varients; + + /* Multiple elements can map to a single buffer varient. */ + unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; + + boolean use_instancing; + unsigned instance_id; + run_func gen_run; run_elts_func gen_run_elts; @@ -359,32 +379,61 @@ static boolean init_inputs( struct translate_sse *p, boolean linear ) { unsigned i; - if (linear) { - for (i = 0; i < p->nr_buffers; i++) { + struct x86_reg instance_id = x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; + struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; + + if (linear || varient->instance_divisor) { struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &buffer->stride)); struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].base_ptr)); + get_offset(p, &buffer->base_ptr)); struct x86_reg elt = p->idx_EBX; - struct x86_reg tmp = p->tmp_EAX; - + struct x86_reg tmp_EAX = p->tmp_EAX; /* Calculate pointer to first attrib: + * base_ptr + stride * index, where index depends on instance divisor */ - x86_mov(p->func, tmp, buf_stride); - x86_imul(p->func, tmp, elt); - x86_add(p->func, tmp, buf_base_ptr); + if (varient->instance_divisor) { + /* Our index is instance ID divided by instance divisor. + */ + x86_mov(p->func, tmp_EAX, instance_id); + + if (varient->instance_divisor != 1) { + struct x86_reg tmp_EDX = p->machine_EDX; + struct x86_reg tmp_ECX = p->outbuf_ECX; + + /* TODO: Add x86_shr() to rtasm and use it whenever + * instance divisor is power of two. + */ + + x86_push(p->func, tmp_EDX); + x86_push(p->func, tmp_ECX); + x86_xor(p->func, tmp_EDX, tmp_EDX); + x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); + x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ + x86_pop(p->func, tmp_ECX); + x86_pop(p->func, tmp_EDX); + } + } else { + x86_mov(p->func, tmp_EAX, elt); + } + x86_imul(p->func, tmp_EAX, buf_stride); + x86_add(p->func, tmp_EAX, buf_base_ptr); /* In the linear case, keep the buffer pointer instead of the * index number. */ - if (p->nr_buffers == 1) - x86_mov( p->func, elt, tmp ); + if (linear && p->nr_buffer_varients == 1) + x86_mov(p->func, elt, tmp_EAX); else - x86_mov( p->func, buf_ptr, tmp ); + x86_mov(p->func, buf_ptr, tmp_EAX); } } @@ -394,31 +443,36 @@ static boolean init_inputs( struct translate_sse *p, static struct x86_reg get_buffer_ptr( struct translate_sse *p, boolean linear, - unsigned buf_idx, + unsigned var_idx, struct x86_reg elt ) { - if (linear && p->nr_buffers == 1) { + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + } + if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } - else if (linear) { + else if (linear || p->buffer_varient[var_idx].instance_divisor) { struct x86_reg ptr = p->tmp_EAX; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].ptr)); + get_offset(p, &p->buffer_varient[var_idx].ptr)); x86_mov(p->func, ptr, buf_ptr); return ptr; } else { struct x86_reg ptr = p->tmp_EAX; + const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].base_ptr)); + get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); @@ -436,28 +490,33 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, static boolean incr_inputs( struct translate_sse *p, boolean linear ) { - if (linear && p->nr_buffers == 1) { + if (linear && p->nr_buffer_varients == 1) { struct x86_reg stride = x86_make_disp(p->machine_EDX, get_offset(p, &p->buffer[0].stride)); - x86_add(p->func, p->idx_EBX, stride); - sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + if (p->buffer_varient[0].instance_divisor == 0) { + x86_add(p->func, p->idx_EBX, stride); + sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + } } else if (linear) { unsigned i; /* Is this worthwhile?? */ - for (i = 0; i < p->nr_buffers; i++) { + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); - x86_mov(p->func, p->tmp_EAX, buf_ptr); - x86_add(p->func, p->tmp_EAX, buf_stride); - if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); - x86_mov(p->func, buf_ptr, p->tmp_EAX); + if (varient->instance_divisor == 0) { + x86_mov(p->func, p->tmp_EAX, buf_ptr); + x86_add(p->func, p->tmp_EAX, buf_stride); + if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); + x86_mov(p->func, buf_ptr, p->tmp_EAX); + } } } else { @@ -514,7 +573,18 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); - x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4)); + x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); + + /* Load instance ID. + */ + if (p->use_instancing) { + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 4)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), + p->tmp_EAX); + } /* Get vertex count, compare to zero */ @@ -531,17 +601,18 @@ static boolean build_vertex_emit( struct translate_sse *p, label = x86_get_label(p->func); { struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); - int last_vb = -1; + int last_varient = -1; struct x86_reg vb; for (j = 0; j < p->translate.key.nr_elements; j++) { const struct translate_element *a = &p->translate.key.element[j]; + unsigned varient = p->element_to_buffer_varient[j]; /* Figure out source pointer address: */ - if (a->input_buffer != last_vb) { - last_vb = a->input_buffer; - vb = get_buffer_ptr(p, linear, a->input_buffer, elt); + if (varient != last_varient) { + last_varient = varient; + vb = get_buffer_ptr(p, linear, varient, elt); } if (!translate_attr( p, a, @@ -624,6 +695,7 @@ static void translate_sse_release( struct translate *translate ) static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -631,12 +703,14 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, p->gen_run_elts( translate, elts, count, - output_buffer ); + instance_id, + output_buffer); } static void PIPE_CDECL translate_sse_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -644,7 +718,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, p->gen_run( translate, start, count, - output_buffer ); + instance_id, + output_buffer); } @@ -666,8 +741,37 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; - for (i = 0; i < key->nr_elements; i++) - p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + for (i = 0; i < key->nr_elements; i++) { + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; + + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); + + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } + + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } + } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; + } + } if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); diff --git a/src/gallium/auxiliary/util/u_atomic.h b/src/gallium/auxiliary/util/u_atomic.h new file mode 100644 index 00000000000..3c42477ad4f --- /dev/null +++ b/src/gallium/auxiliary/util/u_atomic.h @@ -0,0 +1,305 @@ +/** + * Many similar implementations exist. See for example libwsbm + * or the linux kernel include/atomic.h + * + * No copyright claimed on this file. + * + */ + +#ifndef U_ATOMIC_H +#define U_ATOMIC_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" + +/* Favor OS-provided implementations. + * + * Where no OS-provided implementation is available, fall back to + * locally coded assembly, compiler intrinsic or ultimately a + * mutex-based implementation. + */ +#if (defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || \ + defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)) +#define PIPE_ATOMIC_OS_UNLOCKED +#elif defined(PIPE_OS_SOLARIS) +#define PIPE_ATOMIC_OS_SOLARIS +#elif defined(PIPE_CC_MSVC) +#define PIPE_ATOMIC_MSVC_INTRINSIC +#elif (defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)) +#define PIPE_ATOMIC_ASM_MSVC_X86 +#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)) +#define PIPE_ATOMIC_ASM_GCC_X86 +#elif defined(PIPE_CC_GCC) +#define PIPE_ATOMIC_GCC_INTRINSIC +#else +#error "Unsupported platform" +#endif + + + +#if defined(PIPE_ATOMIC_ASM_GCC_X86) + +#define PIPE_ATOMIC "GCC x86 assembly" + +#ifdef __cplusplus +extern "C" { +#endif + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +static INLINE boolean +p_atomic_dec_zero(int32_t *v) +{ + unsigned char c; + + __asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c) + ::"memory"); + + return c != 0; +} + +static INLINE void +p_atomic_inc(int32_t *v) +{ + __asm__ __volatile__("lock; incl %0":"+m"(*v)); +} + +static INLINE void +p_atomic_dec(int32_t *v) +{ + __asm__ __volatile__("lock; decl %0":"+m"(*v)); +} + +static INLINE int32_t +p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new) +{ + return __sync_val_compare_and_swap(v, old, _new); +} + +#ifdef __cplusplus +} +#endif + +#endif + + + +/* Implementation using GCC-provided synchronization intrinsics + */ +#if defined(PIPE_ATOMIC_GCC_INTRINSIC) + +#define PIPE_ATOMIC "GCC Sync Intrinsics" + +#ifdef __cplusplus +extern "C" { +#endif + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +static INLINE boolean +p_atomic_dec_zero(int32_t *v) +{ + return (__sync_sub_and_fetch(v, 1) == 0); +} + +static INLINE void +p_atomic_inc(int32_t *v) +{ + (void) __sync_add_and_fetch(v, 1); +} + +static INLINE void +p_atomic_dec(int32_t *v) +{ + (void) __sync_sub_and_fetch(v, 1); +} + +static INLINE int32_t +p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new) +{ + return __sync_val_compare_and_swap(v, old, _new); +} + +#ifdef __cplusplus +} +#endif + +#endif + + + +/* Unlocked version for single threaded environments, such as some + * windows kernel modules. + */ +#if defined(PIPE_ATOMIC_OS_UNLOCKED) + +#define PIPE_ATOMIC "Unlocked" + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) +#define p_atomic_dec_zero(_v) ((boolean) --(*(_v))) +#define p_atomic_inc(_v) ((void) (*(_v))++) +#define p_atomic_dec(_v) ((void) (*(_v))--) +#define p_atomic_cmpxchg(_v, old, _new) (*(_v) == old ? *(_v) = (_new) : *(_v)) + +#endif + + +/* Locally coded assembly for MSVC on x86: + */ +#if defined(PIPE_ATOMIC_ASM_MSVC_X86) + +#define PIPE_ATOMIC "MSVC x86 assembly" + +#ifdef __cplusplus +extern "C" { +#endif + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +static INLINE boolean +p_atomic_dec_zero(int32_t *v) +{ + unsigned char c; + + __asm { + mov eax, [v] + lock dec dword ptr [eax] + sete byte ptr [c] + } + + return c != 0; +} + +static INLINE void +p_atomic_inc(int32_t *v) +{ + __asm { + mov eax, [v] + lock inc dword ptr [eax] + } +} + +static INLINE void +p_atomic_dec(int32_t *v) +{ + __asm { + mov eax, [v] + lock dec dword ptr [eax] + } +} + +static INLINE int32_t +p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new) +{ + int32_t orig; + + __asm { + mov ecx, [v] + mov eax, [old] + mov edx, [_new] + lock cmpxchg [ecx], edx + mov [orig], eax + } + + return orig; +} + +#ifdef __cplusplus +} +#endif + +#endif + + +#if defined(PIPE_ATOMIC_MSVC_INTRINSIC) + +#define PIPE_ATOMIC "MSVC Intrinsics" + +#include <intrin.h> + +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) +#pragma intrinsic(_InterlockedCompareExchange) + +#ifdef __cplusplus +extern "C" { +#endif + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +static INLINE boolean +p_atomic_dec_zero(int32_t *v) +{ + return _InterlockedDecrement((long *)v) == 0; +} + +static INLINE void +p_atomic_inc(int32_t *v) +{ + _InterlockedIncrement((long *)v); +} + +static INLINE void +p_atomic_dec(int32_t *v) +{ + _InterlockedDecrement((long *)v); +} + +static INLINE int32_t +p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new) +{ + return _InterlockedCompareExchange((long *)v, _new, old); +} + +#ifdef __cplusplus +} +#endif + +#endif + +#if defined(PIPE_ATOMIC_OS_SOLARIS) + +#define PIPE_ATOMIC "Solaris OS atomic functions" + +#include <atomic.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define p_atomic_set(_v, _i) (*(_v) = (_i)) +#define p_atomic_read(_v) (*(_v)) + +static INLINE boolean +p_atomic_dec_zero(int32_t *v) +{ + uint32_t n = atomic_dec_32_nv((uint32_t *) v); + + return n != 0; +} + +#define p_atomic_inc(_v) atomic_inc_32((uint32_t *) _v) +#define p_atomic_dec(_v) atomic_dec_32((uint32_t *) _v) + +#define p_atomic_cmpxchg(_v, _old, _new) \ + atomic_cas_32( (uint32_t *) _v, (uint32_t) _old, (uint32_t) _new) + +#ifdef __cplusplus +} +#endif + +#endif + + +#ifndef PIPE_ATOMIC +#error "No pipe_atomic implementation selected" +#endif + + + +#endif /* U_ATOMIC_H */ diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3f74e2aa8b8..f0bc58a558f 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -36,7 +36,7 @@ #include "pipe/p_context.h" #include "util/u_debug.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" @@ -92,7 +92,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) /* disabled blending/masking */ memset(&ctx->blend, 0, sizeof(ctx->blend)); - ctx->blend.colormask = PIPE_MASK_RGBA; + ctx->blend.rt[0].colormask = PIPE_MASK_RGBA; /* no-op depth/stencil/alpha */ memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); @@ -226,8 +226,8 @@ setup_vertex_data_tex(struct blit_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write(ctx->pipe->screen, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); + pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + offset, sizeof(ctx->vertices), ctx->vertices); return offset; } @@ -262,6 +262,10 @@ regions_overlap(int srcX0, int srcY0, * Copy pixel block from src surface to dst surface. * Overlapping regions are acceptable. * Flipping and stretching are supported. + * \param filter one of PIPE_TEX_MIPFILTER_NEAREST/LINEAR + * \param writemask controls which channels in the dest surface are sourced + * from the src surface. Disabled channels are sourced + * from (0,0,0,1). * XXX what about clipping??? * XXX need some control over blitting Z and/or stencil. */ diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 46b4706b768..f3b4491d175 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" @@ -125,7 +125,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&blend, 0, sizeof(blend)); ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend); - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].colormask = PIPE_MASK_RGBA; ctx->blend_write_color = pipe->create_blend_state(pipe, &blend); /* depth stencil alpha state objects */ @@ -379,9 +379,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, float t1 = y1 / (float)surf->height; float s2 = x2 / (float)surf->width; float t2 = y2 / (float)surf->height; - const float st[4][2] = { - {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2} - }; + float st[4][2]; + + st[0][0] = s1; + st[0][1] = t1; + st[1][0] = s2; + st[1][1] = t1; + st[2][0] = s2; + st[2][1] = t2; + st[3][0] = s1; + st[3][1] = t2; util_map_texcoords2d_onto_cubemap(surf->face, /* pointer, stride in floats */ diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 9b4e6ca2a73..4821b8a1434 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -29,125 +29,30 @@ #include "pipe/p_config.h" -#include <stdarg.h> - - -#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY - -#include <windows.h> -#include <winddi.h> - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) - -#include <stdio.h> -#include <stdlib.h> -#include <windows.h> -#include <types.h> - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) - -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers -#endif -#include <windows.h> -#include <stdio.h> - -#else - -#include <stdio.h> -#include <stdlib.h> - -#endif - -#include "pipe/p_compiler.h" +#include "pipe/p_compiler.h" +#include "os/os_stream.h" #include "util/u_debug.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_string.h" -#include "util/u_stream.h" #include "util/u_math.h" #include "util/u_tile.h" #include "util/u_prim.h" -#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY -static INLINE void -_EngDebugPrint(const char *format, ...) -{ - va_list ap; - va_start(ap, format); - EngDebugPrint("", (PCHAR)format, ap); - va_end(ap); -} -#endif - - void _debug_vprintf(const char *format, va_list ap) { -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - /* EngDebugPrint does not handle float point arguments, so we need to use - * our own vsnprintf implementation. It is also very slow, so buffer until - * we find a newline. */ - static char buf[512] = {'\0'}; - size_t len = strlen(buf); - int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); - if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { - _EngDebugPrint("%s", buf); - buf[0] = '\0'; - } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) - /* OutputDebugStringA can be very slow, so buffer until we find a newline. */ + /* We buffer until we find a newline. */ static char buf[4096] = {'\0'}; size_t len = strlen(buf); int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { - OutputDebugStringA(buf); + os_log_message(buf); buf[0] = '\0'; } - - if(GetConsoleWindow() && !IsDebuggerPresent()) { - fflush(stdout); - vfprintf(stderr, format, ap); - fflush(stderr); - } - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) - wchar_t *wide_format; - long wide_str_len; - char buf[512]; - int ret; -#if (_WIN32_WCE < 600) - ret = vsprintf(buf, format, ap); - if(ret < 0){ - sprintf(buf, "Cant handle debug print!"); - ret = 25; - } -#else - ret = vsprintf_s(buf, 512, format, ap); - if(ret < 0){ - sprintf_s(buf, 512, "Cant handle debug print!"); - ret = 25; - } -#endif - buf[ret] = '\0'; - /* Format is ascii - needs to be converted to wchar_t for printing */ - wide_str_len = MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, NULL, 0); - wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t)); - if (wide_format) { - MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, - wide_format, wide_str_len); - NKDbgPrintfW(wide_format, wide_format); - free(wide_format); - } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - /* TODO */ -#else /* !PIPE_SUBSYSTEM_WINDOWS */ - fflush(stdout); - vfprintf(stderr, format, ap); -#endif } @@ -169,108 +74,12 @@ void debug_print_blob( const char *name, #endif -#ifndef debug_break -void debug_break(void) -{ -#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) - DebugBreak(); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - EngDebugBreak(); -#else - abort(); -#endif -} -#endif - - -#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY -static const char * -find(const char *start, const char *end, char c) -{ - const char *p; - for(p = start; !end || p != end; ++p) { - if(*p == c) - return p; - if(*p < 32) - break; - } - return NULL; -} - -static int -compare(const char *start, const char *end, const char *s) -{ - const char *p, *q; - for(p = start, q = s; p != end && *q != '\0'; ++p, ++q) { - if(*p != *q) - return 0; - } - return p == end && *q == '\0'; -} - -static void -copy(char *dst, const char *start, const char *end, size_t n) -{ - const char *p; - char *q; - for(p = start, q = dst, n = n - 1; p != end && n; ++p, ++q, --n) - *q = *p; - *q = '\0'; -} -#endif - - -static INLINE const char * -_debug_get_option(const char *name) -{ -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - /* EngMapFile creates the file if it does not exists, so it must either be - * disabled on release versions (or put in a less conspicuous place). */ -#ifdef DEBUG - const char *result = NULL; - ULONG_PTR iFile = 0; - const void *pMap = NULL; - const char *sol, *eol, *sep; - static char output[1024]; - - pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); - if(pMap) { - sol = (const char *)pMap; - while(1) { - /* TODO: handle LF line endings */ - eol = find(sol, NULL, '\r'); - if(!eol || eol == sol) - break; - sep = find(sol, eol, '='); - if(!sep) - break; - if(compare(sol, sep, name)) { - copy(output, sep + 1, eol, sizeof(output)); - result = output; - break; - } - sol = eol + 2; - } - EngUnmapFile(iFile); - } - return result; -#else - return NULL; -#endif -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - /* TODO: implement */ - return NULL; -#else - return getenv(name); -#endif -} - const char * debug_get_option(const char *name, const char *dfault) { const char *result; - result = _debug_get_option(name); + result = os_get_option(name); if(!result) result = dfault; @@ -282,7 +91,7 @@ debug_get_option(const char *name, const char *dfault) boolean debug_get_bool_option(const char *name, boolean dfault) { - const char *str = _debug_get_option(name); + const char *str = os_get_option(name); boolean result; if(str == NULL) @@ -312,7 +121,7 @@ debug_get_num_option(const char *name, long dfault) long result; const char *str; - str = _debug_get_option(name); + str = os_get_option(name); if(!str) result = dfault; else { @@ -348,7 +157,7 @@ debug_get_flags_option(const char *name, unsigned long result; const char *str; - str = _debug_get_option(name); + str = os_get_option(name); if(!str) result = dfault; else if (!util_strcmp(str, "help")) { @@ -389,7 +198,7 @@ void _debug_assert_fail(const char *expr, #else if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE)) #endif - debug_break(); + os_abort(); else _debug_printf("continuing...\n"); } @@ -631,6 +440,14 @@ const char *u_prim_name( unsigned prim ) #ifdef DEBUG +/** + * Dump an image to a .raw or .ppm file (depends on OS). + * \param format PIPE_FORMAT_x + * \param cpp bytes per pixel + * \param width width in pixels + * \param height height in pixels + * \param stride row stride in bytes + */ void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, @@ -672,6 +489,52 @@ void debug_dump_image(const char *prefix, } EngUnmapFile(iFile); +#elif defined(PIPE_OS_UNIX) + /* write a ppm file */ + char filename[256]; + FILE *f; + + util_snprintf(filename, sizeof(filename), "%s.ppm", prefix); + + f = fopen(filename, "w"); + if (f) { + int i, x, y; + int r, g, b; + const uint8_t *ptr = (uint8_t *) data; + + /* XXX this is a hack */ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + r = 2; + g = 1; + b = 0; + break; + default: + r = 0; + g = 1; + b = 1; + } + + fprintf(f, "P6\n"); + fprintf(f, "# ppm-file created by osdemo.c\n"); + fprintf(f, "%i %i\n", width, height); + fprintf(f, "255\n"); + fclose(f); + + f = fopen(filename, "ab"); /* reopen in binary append mode */ + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + i = y * stride + x * cpp; + fputc(ptr[i + r], f); /* write red */ + fputc(ptr[i + g], f); /* write green */ + fputc(ptr[i + b], f); /* write blue */ + } + } + fclose(f); + } + else { + fprintf(stderr, "Can't open %s for writing\n", filename); + } #endif } @@ -712,6 +575,27 @@ error: } +void debug_dump_texture(const char *prefix, + struct pipe_texture *texture) +{ + struct pipe_surface *surface; + struct pipe_screen *screen; + + if (!texture) + return; + + screen = texture->screen; + + /* XXX for now, just dump image for face=0, level=0 */ + surface = screen->get_tex_surface(screen, texture, 0, 0, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + if (surface) { + debug_dump_surface(prefix, surface); + screen->tex_surface_destroy(surface); + } +} + + #pragma pack(push,2) struct bmp_file_header { uint16_t bfType; @@ -797,7 +681,7 @@ debug_dump_float_rgba_bmp(const char *filename, float *rgba, unsigned stride) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT - struct util_stream *stream; + struct os_stream *stream; struct bmp_file_header bmfh; struct bmp_info_header bmih; unsigned x, y; @@ -823,12 +707,12 @@ debug_dump_float_rgba_bmp(const char *filename, bmih.biClrUsed = 0; bmih.biClrImportant = 0; - stream = util_stream_create(filename, bmfh.bfSize); + stream = os_stream_create(filename, bmfh.bfSize); if(!stream) goto error1; - util_stream_write(stream, &bmfh, 14); - util_stream_write(stream, &bmih, 40); + os_stream_write(stream, &bmfh, 14); + os_stream_write(stream, &bmih, 40); y = height; while(y--) { @@ -840,11 +724,11 @@ debug_dump_float_rgba_bmp(const char *filename, pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]); pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]); pixel.rgbAlpha = 255; - util_stream_write(stream, &pixel, 4); + os_stream_write(stream, &pixel, 4); } } - util_stream_close(stream); + os_stream_close(stream); error1: ; #endif diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index facc30a5534..efcf065d276 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -39,9 +39,7 @@ #define U_DEBUG_H_ -#include <stdarg.h> - -#include "pipe/p_compiler.h" +#include "os/os_misc.h" #ifdef __cplusplus @@ -49,22 +47,6 @@ extern "C" { #endif -#if defined(DBG) || defined(DEBUG) -#ifndef DEBUG -#define DEBUG 1 -#endif -#else -#ifndef NDEBUG -#define NDEBUG 1 -#endif -#endif - - -/* MSVC bebore VC7 does not have the __FUNCTION__ macro */ -#if defined(_MSC_VER) && _MSC_VER < 1300 -#define __FUNCTION__ "???" -#endif - #if defined(__GNUC__) #define _util_printf_format(fmt, list) __attribute__ ((format (printf, fmt, list))) #else @@ -155,13 +137,7 @@ void debug_print_format(const char *msg, unsigned fmt ); * Hard-coded breakpoint. */ #ifdef DEBUG -#if (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) && defined(PIPE_CC_GCC) -#define debug_break() __asm("int3") -#elif defined(PIPE_CC_MSVC) -#define debug_break() __debugbreak() -#else -void debug_break(void); -#endif +#define debug_break() os_break() #else /* !DEBUG */ #define debug_break() ((void)0) #endif /* !DEBUG */ @@ -328,22 +304,6 @@ debug_get_flags_option(const char *name, unsigned long dfault); -void * -debug_malloc(const char *file, unsigned line, const char *function, - size_t size); - -void -debug_free(const char *file, unsigned line, const char *function, - void *ptr); - -void * -debug_calloc(const char *file, unsigned line, const char *function, - size_t count, size_t size ); - -void * -debug_realloc(const char *file, unsigned line, const char *function, - void *old_ptr, size_t old_size, size_t new_size ); - unsigned long debug_memory_begin(void); @@ -354,6 +314,8 @@ debug_memory_end(unsigned long beginning); #ifdef DEBUG struct pipe_surface; struct pipe_transfer; +struct pipe_texture; + void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, @@ -361,6 +323,8 @@ void debug_dump_image(const char *prefix, const void *data); void debug_dump_surface(const char *prefix, struct pipe_surface *surface); +void debug_dump_texture(const char *prefix, + struct pipe_texture *texture); void debug_dump_surface_bmp(const char *filename, struct pipe_surface *surface); void debug_dump_transfer_bmp(const char *filename, diff --git a/src/gallium/auxiliary/util/u_debug_memory.c b/src/gallium/auxiliary/util/u_debug_memory.c index d6484f4ad51..f1baa62f894 100644 --- a/src/gallium/auxiliary/util/u_debug_memory.c +++ b/src/gallium/auxiliary/util/u_debug_memory.c @@ -34,15 +34,10 @@ #include "pipe/p_config.h" -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) -#include <windows.h> -#include <winddi.h> -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -#include <wdm.h> -#else -#include <stdio.h> -#include <stdlib.h> -#endif +#define DEBUG_MEMORY_IMPLEMENTATION + +#include "os/os_memory.h" +#include "os/os_memory_debug.h" #include "util/u_debug.h" #include "util/u_debug_stack.h" @@ -53,18 +48,6 @@ #define DEBUG_MEMORY_STACK 0 /* XXX: disabled until we have symbol lookup */ -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) -#define real_malloc(_size) EngAllocMem(0, _size, 'D3AG') -#define real_free(_ptr) EngFreeMem(_ptr) -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -#define real_malloc(_size) ExAllocatePool(0, _size) -#define real_free(_ptr) ExFreePool(_ptr) -#else -#define real_malloc(_size) malloc(_size) -#define real_free(_ptr) free(_ptr) -#endif - - struct debug_memory_header { struct list_head head; @@ -127,7 +110,7 @@ debug_malloc(const char *file, unsigned line, const char *function, struct debug_memory_header *hdr; struct debug_memory_footer *ftr; - hdr = real_malloc(sizeof(*hdr) + size + sizeof(*ftr)); + hdr = os_malloc(sizeof(*hdr) + size + sizeof(*ftr)); if(!hdr) { debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n", file, line, function, @@ -185,7 +168,7 @@ debug_free(const char *file, unsigned line, const char *function, hdr->magic = 0; ftr->magic = 0; - real_free(hdr); + os_free(hdr); } void * @@ -232,7 +215,7 @@ debug_realloc(const char *file, unsigned line, const char *function, } /* alloc new */ - new_hdr = real_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr)); + new_hdr = os_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr)); if(!new_hdr) { debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n", file, line, function, @@ -258,7 +241,7 @@ debug_realloc(const char *file, unsigned line, const char *function, /* free old */ old_hdr->magic = 0; old_ftr->magic = 0; - real_free(old_hdr); + os_free(old_hdr); return new_ptr; } diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index b42b429d4d7..37ed789f955 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -28,6 +28,7 @@ #include "pipe/p_config.h" +#include "pipe/p_compiler.h" #if defined(PIPE_OS_UNIX) #include <dlfcn.h> diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 4110485fb19..14506e84519 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -28,7 +28,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_draw_quad.h" @@ -61,6 +61,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex attributes */ for (i = 0; i < num_attribs; i++) { velements[i].src_offset = i * 4 * sizeof(float); + velements[i].instance_divisor = 0; velements[i].vertex_buffer_index = 0; velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; velements[i].nr_components = 4; diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 9f16b42944e..01f7931aed1 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -62,10 +62,10 @@ PIPE_FORMAT_R16G16_SSCALED , array , 1, 1, s16 , s16 , , , xy01, PIPE_FORMAT_R16G16B16_SSCALED , array , 1, 1, s16 , s16 , s16 , , xyz1, rgb PIPE_FORMAT_R16G16B16A16_SSCALED , array , 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb PIPE_FORMAT_R8_UNORM , array , 1, 1, un8 , , , , x001, rgb -PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , xy01, rgb -PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , xyz1, rgb -PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb -PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyz1, rgb +PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , yx01, rgb +PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , zyx1, rgb +PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb +PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb PIPE_FORMAT_R8_USCALED , array , 1, 1, u8 , , , , x001, rgb PIPE_FORMAT_R8G8_USCALED , array , 1, 1, u8 , u8 , , , xy01, rgb PIPE_FORMAT_R8G8B8_USCALED , array , 1, 1, u8 , u8 , u8 , , xyz1, rgb diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index a558923b2ed..4323bc881bd 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -31,6 +31,7 @@ #include "pipe/p_format.h" +#include "util/u_debug.h" #ifdef __cplusplus extern "C" { diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 76023794dcd..4e358d3938c 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -37,7 +37,7 @@ #include "pipe/p_context.h" #include "util/u_debug.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" @@ -1287,7 +1287,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, /* disabled blending/masking */ memset(&ctx->blend, 0, sizeof(ctx->blend)); - ctx->blend.colormask = PIPE_MASK_RGBA; + ctx->blend.rt[0].colormask = PIPE_MASK_RGBA; /* no-op depth/stencil/alpha */ memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); @@ -1411,8 +1411,8 @@ set_vertex_data(struct gen_mipmap_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write(ctx->pipe->screen, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); + pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + offset, sizeof(ctx->vertices), ctx->vertices); return offset; } diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 5fbd62a03d2..e95d58ea863 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -25,12 +25,15 @@ * **************************************************************************/ -#ifndef P_INLINES_H -#define P_INLINES_H +#ifndef U_INLINES_H +#define U_INLINES_H -#include "p_context.h" -#include "p_defines.h" -#include "p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_screen.h" +#include "util/u_debug.h" +#include "util/u_atomic.h" #ifdef __cplusplus @@ -38,7 +41,84 @@ extern "C" { #endif +/* + * Reference counting helper functions. + */ + + +static INLINE void +pipe_reference_init(struct pipe_reference *reference, unsigned count) +{ + p_atomic_set(&reference->count, count); +} + +static INLINE boolean +pipe_is_referenced(struct pipe_reference *reference) +{ + return p_atomic_read(&reference->count) != 0; +} + /** + * Update reference counting. + * The old thing pointed to, if any, will be unreferenced. + * Both 'ptr' and 'reference' may be NULL. + * \return TRUE if the object's refcount hits zero and should be destroyed. + */ +static INLINE boolean +pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference) +{ + boolean destroy = FALSE; + + if(ptr != reference) { + /* bump the reference.count first */ + if (reference) { + assert(pipe_is_referenced(reference)); + p_atomic_inc(&reference->count); + } + + if (ptr) { + assert(pipe_is_referenced(ptr)); + if (p_atomic_dec_zero(&ptr->count)) { + destroy = TRUE; + } + } + } + + return destroy; +} + +static INLINE void +pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf) +{ + struct pipe_buffer *old_buf = *ptr; + + if (pipe_reference(&(*ptr)->reference, &buf->reference)) + old_buf->screen->buffer_destroy(old_buf); + *ptr = buf; +} + +static INLINE void +pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) +{ + struct pipe_surface *old_surf = *ptr; + + if (pipe_reference(&(*ptr)->reference, &surf->reference)) + old_surf->texture->screen->tex_surface_destroy(old_surf); + *ptr = surf; +} + +static INLINE void +pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex) +{ + struct pipe_texture *old_tex = *ptr; + + if (pipe_reference(&(*ptr)->reference, &tex->reference)) + old_tex->screen->texture_destroy(old_tex); + *ptr = tex; +} + + +/* * Convenience wrappers for screen buffer functions. */ @@ -63,13 +143,6 @@ pipe_buffer_map(struct pipe_screen *screen, if(screen->buffer_map_range) { unsigned offset = 0; unsigned length = buf->size; - - /* XXX: Actually we should be using/detecting DISCARD - * instead of assuming that WRITE implies discard */ - if((usage & PIPE_BUFFER_USAGE_CPU_WRITE) && - !(usage & PIPE_BUFFER_USAGE_DISCARD)) - usage |= PIPE_BUFFER_USAGE_CPU_READ; - return screen->buffer_map_range(screen, buf, offset, length, usage); } else @@ -126,7 +199,39 @@ pipe_buffer_write(struct pipe_screen *screen, map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD); + assert(map); + if(map) { + memcpy((uint8_t *)map + offset, data, size); + pipe_buffer_flush_mapped_range(screen, buf, offset, size); + pipe_buffer_unmap(screen, buf); + } +} + +/** + * Special case for writing non-overlapping ranges. + * + * We can avoid GPU/CPU synchronization when writing range that has never + * been written before. + */ +static INLINE void +pipe_buffer_write_nooverlap(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned size, + const void *data) +{ + void *map; + + assert(offset < buf->size); + assert(offset + size <= buf->size); + assert(size); + + map = pipe_buffer_map_range(screen, buf, offset, size, + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); assert(map); if(map) { memcpy((uint8_t *)map + offset, data, size); @@ -196,4 +301,4 @@ pipe_transfer_buffer_flags( struct pipe_transfer *transf ) } #endif -#endif /* P_INLINES_H */ +#endif /* U_INLINES_H */ diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c index c4b9eb3d9b7..e161ccd88eb 100644 --- a/src/gallium/auxiliary/util/u_keymap.c +++ b/src/gallium/auxiliary/util/u_keymap.c @@ -36,7 +36,6 @@ #include "pipe/p_compiler.h" #include "util/u_debug.h" -#include "pipe/p_defines.h" #include "cso_cache/cso_hash.h" diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index c3f8c918338..a2fc5973565 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -26,7 +26,7 @@ **************************************************************************/ -/** +/* * Memory functions */ @@ -37,6 +37,7 @@ #include "util/u_pointer.h" #include "util/u_debug.h" +#include "os/os_memory.h" #ifdef __cplusplus @@ -44,114 +45,13 @@ extern "C" { #endif -/* Define ENOMEM for WINCE */ -#if (_WIN32_WCE < 600) -#ifndef ENOMEM -#define ENOMEM 12 -#endif -#endif - - -#if defined(PIPE_OS_WINDOWS) && defined(DEBUG) - -/* memory debugging */ - -#include "util/u_debug.h" - -#define MALLOC( _size ) \ - debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) -#define CALLOC( _count, _size ) \ - debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) -#define FREE( _ptr ) \ - debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) -#define REALLOC( _ptr, _old_size, _size ) \ - debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -void * __stdcall -EngAllocMem( - unsigned long Flags, - unsigned long MemSize, - unsigned long Tag ); - -void __stdcall -EngFreeMem( - void *Mem ); - -#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) -#define _FREE( _ptr ) EngFreeMem( _ptr ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - -void * -ExAllocatePool( - unsigned long PoolType, - size_t NumberOfBytes); - -void -ExFreePool(void *P); - -#define MALLOC(_size) ExAllocatePool(0, _size) -#define _FREE(_ptr) ExFreePool(_ptr) - -#else - -#define MALLOC( SIZE ) malloc( SIZE ) -#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) -#define FREE( PTR ) free( PTR ) - -static INLINE void * -_REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) -{ - (void) old_size; - return realloc(old_ptr, new_size); -} -#define REALLOC( a, b, c ) _REALLOC( a, b, c ) -#endif - - -#ifndef CALLOC -static INLINE void * -CALLOC( unsigned count, unsigned size ) -{ - void *ptr = MALLOC( count * size ); - if( ptr ) { - memset( ptr, 0, count * size ); - } - return ptr; -} -#endif /* !CALLOC */ +#define MALLOC(_size) os_malloc(_size) -#ifndef FREE -static INLINE void -FREE( void *ptr ) -{ - if( ptr ) { - _FREE( ptr ); - } -} -#endif /* !FREE */ +#define CALLOC(_count, _size) os_calloc(_count, _size) -#ifndef REALLOC -static INLINE void * -REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) -{ - void *new_ptr = NULL; - - if (new_size != 0) { - unsigned copy_size = old_size < new_size ? old_size : new_size; - new_ptr = MALLOC( new_size ); - if (new_ptr && old_ptr && copy_size) { - memcpy( new_ptr, old_ptr, copy_size ); - } - } - - FREE( old_ptr ); - return new_ptr; -} -#endif /* !REALLOC */ +#define FREE(_ptr ) os_free(_ptr) +#define REALLOC(_ptr, _old_size, _size) os_realloc(_ptr, _old_size, _size) #define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) @@ -160,50 +60,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size)) -/** - * Return memory on given byte alignment - */ -static INLINE void * -align_malloc(size_t bytes, uint alignment) -{ -#if defined(HAVE_POSIX_MEMALIGN) - void *mem; - alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); - if(posix_memalign(& mem, alignment, bytes) != 0) - return NULL; - return mem; -#else - char *ptr, *buf; - - assert( alignment > 0 ); - - ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); - if (!ptr) - return NULL; - - buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); - *(char **)(buf - sizeof(void *)) = ptr; - - return buf; -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} - -/** - * Free memory returned by align_malloc(). - */ -static INLINE void -align_free(void *ptr) -{ -#if defined(HAVE_POSIX_MEMALIGN) - FREE(ptr); -#else - if (ptr) { - void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); - void *realAddr = *cubbyHole; - FREE(realAddr); - } -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} +#define align_malloc(_size, _alignment) os_malloc_aligned(_size, _alignment) +#define align_free(_ptr) os_free_aligned(_ptr) /** diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 43eb0153ee7..0ab53c75dd6 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -425,6 +425,8 @@ util_pack_z(enum pipe_format format, double z) if (z == 1.0) return 0xffffffff; return (uint) (z * 0xffffffff); + case PIPE_FORMAT_Z32_FLOAT: + return (uint)z; case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: if (z == 1.0) diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 10a874f3416..64390e13851 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -30,12 +30,13 @@ #define U_BLIT_H +#include "pipe/p_defines.h" +#include "util/u_debug.h" + #ifdef __cplusplus extern "C" { #endif -#include "pipe/p_defines.h" - static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr ) { boolean ok = TRUE; diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c new file mode 100644 index 00000000000..648b105b137 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.c @@ -0,0 +1,160 @@ + +#include "os/os_thread.h" +#include "pipe/p_defines.h" +#include "util/u_ringbuffer.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +/* Generic ringbuffer: + */ +struct util_ringbuffer +{ + struct util_packet *buf; + unsigned mask; + + /* Can this be done with atomic variables?? + */ + unsigned head; + unsigned tail; + pipe_condvar change; + pipe_mutex mutex; +}; + + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ) +{ + struct util_ringbuffer *ring = CALLOC_STRUCT(util_ringbuffer); + if (ring == NULL) + return NULL; + + assert(util_is_power_of_two(dwords)); + + ring->buf = MALLOC( dwords * sizeof(unsigned) ); + if (ring->buf == NULL) + goto fail; + + ring->mask = dwords - 1; + + pipe_condvar_init(ring->change); + pipe_mutex_init(ring->mutex); + return ring; + +fail: + FREE(ring->buf); + FREE(ring); + return NULL; +} + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ) +{ + pipe_condvar_destroy(ring->change); + pipe_mutex_destroy(ring->mutex); + FREE(ring->buf); + FREE(ring); +} + +/** + * Return number of free entries in the ring + */ +static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring ) +{ + return (ring->tail - (ring->head + 1)) & ring->mask; +} + +/** + * Is the ring buffer empty? + */ +static INLINE boolean util_ringbuffer_empty( const struct util_ringbuffer *ring ) +{ + return util_ringbuffer_space(ring) == ring->mask; +} + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ) +{ + unsigned i; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* make sure we don't request an impossible amount of space + */ + assert(packet->dwords <= ring->mask); + + /* Wait for free space: + */ + while (util_ringbuffer_space(ring) < packet->dwords) + pipe_condvar_wait(ring->change, ring->mutex); + + /* Copy data to ring: + */ + for (i = 0; i < packet->dwords; i++) { + + /* Copy all dwords of the packet. Note we're abusing the + * typesystem a little - we're being passed a pointer to + * something, but probably not an array of packet structs: + */ + ring->buf[ring->head] = packet[i]; + ring->head++; + ring->head &= ring->mask; + } + + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); +} + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ) +{ + const struct util_packet *ring_packet; + unsigned i; + int ret = PIPE_OK; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* Get next ring entry: + */ + if (wait) { + while (util_ringbuffer_empty(ring)) + pipe_condvar_wait(ring->change, ring->mutex); + } + else { + if (util_ringbuffer_empty(ring)) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + } + + ring_packet = &ring->buf[ring->tail]; + + /* Both of these are considered bugs. Raise an assert on debug builds. + */ + if (ring_packet->dwords > ring->mask + 1 - util_ringbuffer_space(ring) || + ring_packet->dwords > max_dwords) { + assert(0); + ret = PIPE_ERROR_BAD_INPUT; + goto out; + } + + /* Copy data from ring: + */ + for (i = 0; i < ring_packet->dwords; i++) { + packet[i] = ring->buf[ring->tail]; + ring->tail++; + ring->tail &= ring->mask; + } + +out: + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); + return ret; +} diff --git a/src/gallium/auxiliary/util/u_ringbuffer.h b/src/gallium/auxiliary/util/u_ringbuffer.h new file mode 100644 index 00000000000..85f0ad6c1f6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.h @@ -0,0 +1,29 @@ + +#ifndef UTIL_RINGBUFFER_H +#define UTIL_RINGBUFFER_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" /* only for pipe_error! */ + +/* Generic header + */ +struct util_packet { + unsigned dwords:8; + unsigned data24:24; +}; + +struct util_ringbuffer; + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ); + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ); + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ); + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ); + +#endif diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c index 52382990155..53f3c16dbcc 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.c +++ b/src/gallium/auxiliary/util/u_simple_screen.c @@ -29,7 +29,7 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" static struct pipe_buffer * diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h index 6612a8a7c09..bb3f5ba102f 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.h +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -28,8 +28,145 @@ #ifndef U_SIMPLE_SCREEN_H #define U_SIMPLE_SCREEN_H +#include "pipe/p_format.h" + struct pipe_screen; -struct pipe_winsys; +struct pipe_fence_handle; +struct pipe_surface; +struct pipe_buffer; + +/** + * Gallium3D drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ +struct pipe_winsys +{ + void (*destroy)( struct pipe_winsys *ws ); + + /** Returns name of this winsys interface */ + const char *(*get_name)( struct pipe_winsys *ws ); + + /** + * Do any special operations to ensure buffer size is correct + */ + void (*update_buffer)( struct pipe_winsys *ws, + void *context_private ); + /** + * Do any special operations to ensure frontbuffer contents are + * displayed, eg copy fake frontbuffer. + */ + void (*flush_frontbuffer)( struct pipe_winsys *ws, + struct pipe_surface *surf, + void *context_private ); + + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct pipe_buffer *(*buffer_create)( struct pipe_winsys *ws, + unsigned alignment, + unsigned usage, + unsigned size ); + + /** + * Create a buffer that wraps user-space data. + * + * Effectively this schedules a delayed call to buffer_create + * followed by an upload of the data at *some point in the future*, + * or perhaps never. Basically the allocate/upload is delayed + * until the buffer is actually passed to hardware. + * + * The intention is to provide a quick way to turn regular data + * into a buffer, and secondly to avoid a copy operation if that + * data subsequently turns out to be only accessed by the CPU. + * + * Common example is OpenGL vertex buffers that are subsequently + * processed either by software TNL in the driver or by passing to + * hardware. + * + * XXX: What happens if the delayed call to buffer_create() fails? + * + * Note that ptr may be accessed at any time upto the time when the + * buffer is destroyed, so the data must not be freed before then. + */ + struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *ws, + void *ptr, + unsigned bytes); + + /** + * Allocate storage for a display target surface. + * + * Often surfaces which are meant to be blitted to the front screen (i.e., + * display targets) must be allocated with special characteristics, memory + * pools, or obtained directly from the windowing system. + * + * This callback is invoked by the pipe_screenwhen creating a texture marked + * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying + * buffer storage. + */ + struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned tex_usage, + unsigned *stride); + + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct pipe_winsys *ws, + struct pipe_buffer *buf, + unsigned usage ); + + void (*buffer_unmap)( struct pipe_winsys *ws, + struct pipe_buffer *buf ); + + void (*buffer_destroy)( struct pipe_buffer *buf ); + + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pipe_winsys *ws, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pipe_winsys *ws, + struct pipe_fence_handle *fence, + unsigned flag ); + +}; /** * The following function initializes a simple passthrough screen. diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index b751e29ab62..019dda767d0 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -38,6 +38,7 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "util/u_simple_shaders.h" +#include "util/u_debug.h" #include "tgsi/tgsi_ureg.h" diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 35c49782043..c9f1c9c210f 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -35,8 +35,9 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" #include "pipe/p_defines.h" +#include "util/u_inlines.h" -#include "util/u_format.h" +#include "util/u_memory.h" #include "util/u_surface.h" @@ -111,3 +112,73 @@ util_destroy_rgba_surface(struct pipe_texture *texture, pipe_texture_reference(&texture, NULL); } + + +/** + * Compare pipe_framebuffer_state objects. + * \return TRUE if same, FALSE if different + */ +boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + if (dst->width != src->width || + dst->height != src->height) + return FALSE; + + for (i = 0; i < Elements(src->cbufs); i++) { + if (dst->cbufs[i] != src->cbufs[i]) { + return FALSE; + } + } + + if (dst->nr_cbufs != src->nr_cbufs) { + return FALSE; + } + + if (dst->zsbuf != src->zsbuf) { + return FALSE; + } + + return TRUE; +} + + +/** + * Copy framebuffer state from src to dst, updating refcounts. + */ +void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + dst->width = src->width; + dst->height = src->height; + + for (i = 0; i < Elements(src->cbufs); i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + + dst->nr_cbufs = src->nr_cbufs; + + pipe_surface_reference(&dst->zsbuf, src->zsbuf); +} + + +void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb) +{ + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + + pipe_surface_reference(&fb->zsbuf, NULL); + + fb->width = fb->height = 0; + fb->nr_cbufs = 0; +} diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index ce84ed7ad06..3c60df2c3e5 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -30,11 +30,7 @@ #include "pipe/p_compiler.h" - - -struct pipe_screen; -struct pipe_texture; -struct pipe_surface; +#include "pipe/p_state.h" /** @@ -66,4 +62,17 @@ util_destroy_rgba_surface(struct pipe_texture *texture, struct pipe_surface *surface); +extern boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + +extern void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + + +extern void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb); + + #endif /* U_SURFACE_H */ diff --git a/src/gallium/auxiliary/util/u_texture.c b/src/gallium/auxiliary/util/u_texture.c index cd477ab640f..d97e57a7903 100644 --- a/src/gallium/auxiliary/util/u_texture.c +++ b/src/gallium/auxiliary/util/u_texture.c @@ -37,6 +37,7 @@ #include "pipe/p_defines.h" +#include "util/u_debug.h" #include "util/u_texture.h" void util_map_texcoords2d_onto_cubemap(unsigned face, diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1ba82bb21f0..0051258e22a 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -32,7 +32,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" @@ -390,7 +390,7 @@ a4r4g4b4_put_tile_rgba(ushort *dst, g >>= 4; b >>= 4; a >>= 4; - *dst++ = (a << 12) | (r << 16) | (g << 4) | b; + *dst++ = (a << 12) | (r << 8) | (g << 4) | b; } p += src_stride; } @@ -1357,7 +1357,10 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); + util_format_write_4f(format, + p, src_stride * sizeof(float), + packed, util_format_get_stride(format, w), + 0, 0, w, h); } pipe_put_tile_raw(pt, x, y, w, h, packed, 0); diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c deleted file mode 100644 index b958a986353..00000000000 --- a/src/gallium/auxiliary/util/u_time.c +++ /dev/null @@ -1,225 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * OS independent time-manipulation functions. - * - * @author Jose Fonseca <[email protected]> - */ - - -#include "pipe/p_config.h" - -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) -#include <sys/time.h> -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) -#include <windows.h> -#include <winddi.h> -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -#include <windows.h> -extern VOID KeQuerySystemTime(PLARGE_INTEGER); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) -#include <windows.h> -#else -#error Unsupported OS -#endif - -#include "util/u_time.h" - - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - -static int64_t frequency = 0; - -static INLINE void -util_time_get_frequency(void) -{ - if(!frequency) { -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - LONGLONG temp; - EngQueryPerformanceFrequency(&temp); - frequency = temp; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - LARGE_INTEGER temp; - QueryPerformanceFrequency(&temp); - frequency = temp.QuadPart; -#endif - } -} -#endif - - -void -util_time_get(struct util_time *t) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - gettimeofday(&t->tv, NULL); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - LONGLONG temp; - EngQueryPerformanceCounter(&temp); - t->counter = temp; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - /* Updated every 10 miliseconds, measured in units of 100 nanoseconds. - * http://msdn.microsoft.com/en-us/library/ms801642.aspx */ - LARGE_INTEGER temp; - KeQuerySystemTime(&temp); - t->counter = temp.QuadPart; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - LARGE_INTEGER temp; - QueryPerformanceCounter(&temp); - t->counter = temp.QuadPart; -#endif -} - - -void -util_time_add(const struct util_time *t1, - int64_t usecs, - struct util_time *t2) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; - t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - util_time_get_frequency(); - t2->counter = t1->counter + (usecs * frequency + INT64_C(999999))/INT64_C(1000000); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - /* 1 tick = 100 nano seconds. */ - t2->counter = t1->counter + usecs * 10; -#else - LARGE_INTEGER temp; - LONGLONG freq; - freq = temp.QuadPart; - t2->counter = t1->counter + (usecs * freq)/1000000L; -#endif -} - - -int64_t -util_time_diff(const struct util_time *t1, - const struct util_time *t2) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - return (t2->tv.tv_usec - t1->tv.tv_usec) + - (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - util_time_get_frequency(); - return (t2->counter - t1->counter)*INT64_C(1000000)/frequency; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - return (t2->counter - t1->counter)/10; -#endif -} - - - -uint64_t -util_time_micros( void ) -{ - struct util_time t1; - - util_time_get(&t1); - -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - util_time_get_frequency(); - return t1.counter*INT64_C(1000000)/frequency; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - return t1.counter/10; -#endif -} - - - -/** - * Compare two time values. - * - * Not publicly available because it does not take in account wrap-arounds. - * Use util_time_timeout instead. - */ -static INLINE int -util_time_compare(const struct util_time *t1, - const struct util_time *t2) -{ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - if (t1->tv.tv_sec < t2->tv.tv_sec) - return -1; - else if(t1->tv.tv_sec > t2->tv.tv_sec) - return 1; - else if (t1->tv.tv_usec < t2->tv.tv_usec) - return -1; - else if(t1->tv.tv_usec > t2->tv.tv_usec) - return 1; - else - return 0; -#elif defined(PIPE_OS_WINDOWS) - if (t1->counter < t2->counter) - return -1; - else if(t1->counter > t2->counter) - return 1; - else - return 0; -#endif -} - - -boolean -util_time_timeout(const struct util_time *start, - const struct util_time *end, - const struct util_time *curr) -{ - if(util_time_compare(start, end) <= 0) - return !(util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0); - else - return !(util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0); -} - - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) -void util_time_sleep(unsigned usecs) -{ - LONGLONG start, curr, end; - - EngQueryPerformanceCounter(&start); - - if(!frequency) - EngQueryPerformanceFrequency(&frequency); - - end = start + (usecs * frequency + 999999LL)/1000000LL; - - do { - EngQueryPerformanceCounter(&curr); - } while(start <= curr && curr < end || - end < start && (curr < end || start <= curr)); -} -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) -void util_time_sleep(unsigned usecs) -{ - Sleep((usecs + 999)/ 1000); -} -#endif diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index a6189a247bb..15899c2c884 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -38,15 +38,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) -#include <time.h> /* timeval */ -#include <unistd.h> /* usleep */ -#endif - -#if defined(PIPE_OS_HAIKU) -#include <sys/time.h> /* timeval */ -#include <unistd.h> -#endif +#include "os/os_time.h" #include "pipe/p_compiler.h" @@ -63,43 +55,92 @@ extern "C" { */ struct util_time { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) - struct timeval tv; -#else int64_t counter; -#endif }; -void -util_time_get(struct util_time *t); +PIPE_DEPRECATED +static INLINE void +util_time_get(struct util_time *t) +{ + t->counter = os_time_get(); +} + -void +/** + * Return t2 = t1 + usecs + */ +PIPE_DEPRECATED +static INLINE void util_time_add(const struct util_time *t1, int64_t usecs, - struct util_time *t2); + struct util_time *t2) +{ + t2->counter = t1->counter + usecs; +} -uint64_t -util_time_micros( void ); -int64_t +/** + * Return difference between times, in microseconds + */ +PIPE_DEPRECATED +static INLINE int64_t util_time_diff(const struct util_time *t1, - const struct util_time *t2); + const struct util_time *t2) +{ + return t2->counter - t1->counter; +} + + +/** + * Compare two time values. + * + * Not publicly available because it does not take in account wrap-arounds. + * Use util_time_timeout instead. + */ +static INLINE int +_util_time_compare(const struct util_time *t1, + const struct util_time *t2) +{ + if (t1->counter < t2->counter) + return -1; + else if(t1->counter > t2->counter) + return 1; + else + return 0; +} + /** * Returns non-zero when the timeout expires. */ -boolean +PIPE_DEPRECATED +static INLINE boolean util_time_timeout(const struct util_time *start, const struct util_time *end, - const struct util_time *curr); + const struct util_time *curr) +{ + return os_time_timeout(start->counter, end->counter, curr->counter); +} -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) -#define util_time_sleep usleep -#else -void -util_time_sleep(unsigned usecs); -#endif + +/** + * Return current time in microseconds + */ +PIPE_DEPRECATED +static INLINE int64_t +util_time_micros(void) +{ + return os_time_get(); +} + + +PIPE_DEPRECATED +static INLINE void +util_time_sleep(int64_t usecs) +{ + os_time_sleep(usecs); +} #ifdef __cplusplus diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c index 178acdca4df..59bdcd2c451 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -30,7 +30,7 @@ */ #include "pipe/p_state.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "u_timed_winsys.h" #include "util/u_memory.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 975ee89c455..012b2ae2336 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -30,7 +30,7 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -85,7 +85,9 @@ my_buffer_write(struct pipe_screen *screen, map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); if (map == NULL) return PIPE_ERROR_OUT_OF_MEMORY; diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index fc2a1c59a6b..ba23435f698 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -28,7 +28,7 @@ #include "vl_compositor.h" #include <assert.h> #include <pipe/p_context.h> -#include <pipe/p_inlines.h> +#include <util/u_inlines.h> #include <tgsi/tgsi_parse.h> #include <tgsi/tgsi_build.h> #include <util/u_memory.h> @@ -245,7 +245,6 @@ init_pipe_state(struct vl_compositor *c) sampler.compare_mode = PIPE_TEX_COMPARE_NONE; sampler.compare_func = PIPE_FUNC_ALWAYS; sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ /*sampler.lod_bias = ;*/ /*sampler.min_lod = ;*/ /*sampler.max_lod = ;*/ @@ -316,6 +315,7 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); c->vertex_elems[0].src_offset = 0; + c->vertex_elems[0].instance_divisor = 0; c->vertex_elems[0].vertex_buffer_index = 0; c->vertex_elems[0].nr_components = 2; c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; @@ -345,6 +345,7 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); c->vertex_elems[1].src_offset = 0; + c->vertex_elems[1].instance_divisor = 0; c->vertex_elems[1].vertex_buffer_index = 1; c->vertex_elems[1].nr_components = 2; c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; @@ -353,7 +354,7 @@ init_buffers(struct vl_compositor *c) * Create our vertex shader's constant buffer * Const buffer contains scaling and translation vectors */ - c->vs_const_buf.buffer = pipe_buffer_create + c->vs_const_buf = pipe_buffer_create ( c->pipe->screen, 1, @@ -365,7 +366,7 @@ init_buffers(struct vl_compositor *c) * Create our fragment shader's constant buffer * Const buffer contains the color conversion matrix and bias vectors */ - c->fs_const_buf.buffer = pipe_buffer_create + c->fs_const_buf = pipe_buffer_create ( c->pipe->screen, 1, @@ -390,8 +391,8 @@ cleanup_buffers(struct vl_compositor *c) for (i = 0; i < 2; ++i) pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL); - pipe_buffer_reference(&c->vs_const_buf.buffer, NULL); - pipe_buffer_reference(&c->fs_const_buf.buffer, NULL); + pipe_buffer_reference(&c->vs_const_buf, NULL); + pipe_buffer_reference(&c->fs_const_buf, NULL); } bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe) @@ -483,13 +484,13 @@ void vl_compositor_render(struct vl_compositor *compositor, compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf); vs_consts = pipe_buffer_map ( compositor->pipe->screen, - compositor->vs_const_buf.buffer, + compositor->vs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); @@ -511,7 +512,7 @@ void vl_compositor_render(struct vl_compositor *compositor, vs_consts->src_trans.z = 0; vs_consts->src_trans.w = 0; - pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer); + pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf); compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence); @@ -525,10 +526,10 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float memcpy ( - pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), mat, sizeof(struct fragment_shader_consts) ); - pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf.buffer); + pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf); } diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index f441901a751..6a9a3fd7af1 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -47,7 +47,7 @@ struct vl_compositor struct pipe_scissor_state scissor; struct pipe_vertex_buffer vertex_bufs[2]; struct pipe_vertex_element vertex_elems[2]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; + struct pipe_buffer *vs_const_buf, *fs_const_buf; }; bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index caf581aca60..f323de0ea55 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -28,7 +28,7 @@ #include "vl_mpeg12_mc_renderer.h" #include <assert.h> #include <pipe/p_context.h> -#include <pipe/p_inlines.h> +#include <util/u_inlines.h> #include <util/u_format.h> #include <util/u_math.h> #include <util/u_memory.h> @@ -762,7 +762,6 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r) sampler.compare_mode = PIPE_TEX_COMPARE_NONE; sampler.compare_func = PIPE_FUNC_ALWAYS; sampler.normalized_coords = 1; - /*sampler.prefilter = ; */ /*sampler.shadow_ambient = ; */ /*sampler.lod_bias = ; */ sampler.min_lod = 0; @@ -891,53 +890,61 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) /* Position element */ r->vertex_elems[0].src_offset = 0; + r->vertex_elems[0].instance_divisor = 0; r->vertex_elems[0].vertex_buffer_index = 0; r->vertex_elems[0].nr_components = 2; r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Luma, texcoord element */ r->vertex_elems[1].src_offset = sizeof(struct vertex2f); + r->vertex_elems[1].instance_divisor = 0; r->vertex_elems[1].vertex_buffer_index = 0; r->vertex_elems[1].nr_components = 2; r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Chroma Cr texcoord element */ r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; + r->vertex_elems[2].instance_divisor = 0; r->vertex_elems[2].vertex_buffer_index = 0; r->vertex_elems[2].nr_components = 2; r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Chroma Cb texcoord element */ r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; + r->vertex_elems[3].instance_divisor = 0; r->vertex_elems[3].vertex_buffer_index = 0; r->vertex_elems[3].nr_components = 2; r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; /* First ref surface top field texcoord element */ r->vertex_elems[4].src_offset = 0; + r->vertex_elems[4].instance_divisor = 0; r->vertex_elems[4].vertex_buffer_index = 1; r->vertex_elems[4].nr_components = 2; r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; /* First ref surface bottom field texcoord element */ r->vertex_elems[5].src_offset = sizeof(struct vertex2f); + r->vertex_elems[5].instance_divisor = 0; r->vertex_elems[5].vertex_buffer_index = 1; r->vertex_elems[5].nr_components = 2; r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Second ref surface top field texcoord element */ r->vertex_elems[6].src_offset = 0; + r->vertex_elems[6].instance_divisor = 0; r->vertex_elems[6].vertex_buffer_index = 2; r->vertex_elems[6].nr_components = 2; r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Second ref surface bottom field texcoord element */ r->vertex_elems[7].src_offset = sizeof(struct vertex2f); + r->vertex_elems[7].instance_divisor = 0; r->vertex_elems[7].vertex_buffer_index = 2; r->vertex_elems[7].nr_components = 2; r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; - r->vs_const_buf.buffer = pipe_buffer_create + r->vs_const_buf = pipe_buffer_create ( r->pipe->screen, DEFAULT_BUF_ALIGNMENT, @@ -945,7 +952,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) sizeof(struct vertex_shader_consts) ); - r->fs_const_buf.buffer = pipe_buffer_create + r->fs_const_buf = pipe_buffer_create ( r->pipe->screen, DEFAULT_BUF_ALIGNMENT, @@ -954,11 +961,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) memcpy ( - pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(r->pipe->screen, r->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), &fs_consts, sizeof(struct fragment_shader_consts) ); - pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer); + pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf); return true; } @@ -970,8 +977,8 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r) assert(r); - pipe_buffer_reference(&r->vs_const_buf.buffer, NULL); - pipe_buffer_reference(&r->fs_const_buf.buffer, NULL); + pipe_buffer_reference(&r->vs_const_buf, NULL); + pipe_buffer_reference(&r->fs_const_buf, NULL); for (i = 0; i < 3; ++i) pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL); @@ -1284,19 +1291,19 @@ flush(struct vl_mpeg12_mc_renderer *r) vs_consts = pipe_buffer_map ( - r->pipe->screen, r->vs_const_buf.buffer, + r->pipe->screen, r->vs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); vs_consts->denorm.x = r->surface->width0; vs_consts->denorm.y = r->surface->height0; - pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); + pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf); r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, - &r->vs_const_buf); + r->vs_const_buf); r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, - &r->fs_const_buf); + r->fs_const_buf); if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index 64184337a06..f00b8c7b8b1 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -63,8 +63,8 @@ struct vl_mpeg12_mc_renderer struct pipe_viewport_state viewport; struct pipe_scissor_state scissor; - struct pipe_constant_buffer vs_const_buf; - struct pipe_constant_buffer fs_const_buf; + struct pipe_buffer *vs_const_buf; + struct pipe_buffer *fs_const_buf; struct pipe_framebuffer_state fb_state; struct pipe_vertex_element vertex_elems[8]; diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py index 9b0c86babdb..59c19ed98dd 100644 --- a/src/gallium/docs/source/conf.py +++ b/src/gallium/docs/source/conf.py @@ -16,13 +16,13 @@ import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.append(os.path.abspath('.')) +sys.path.append(os.path.abspath('exts')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.pngmath'] +extensions = ['sphinx.ext.pngmath', 'tgsi'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 21f5f9111a0..a7669575b95 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -33,7 +33,11 @@ This state describes how resources in various flavours (textures, buffers, surfaces) are bound to the driver. -* ``set_constant_buffer`` +* ``set_constant_buffer`` sets a constant buffer to be used for a given shader + type. index is used to indicate which buffer to set (some apis may allow + multiple ones to be set, and binding a specific one later, though drivers + are mostly restricted to the first one right now). + * ``set_framebuffer_state`` * ``set_fragment_sampler_textures`` * ``set_vertex_sampler_textures`` @@ -47,11 +51,13 @@ These pieces of state are too small, variable, and/or trivial to have CSO objects. They all follow simple, one-method binding calls, e.g. ``set_edgeflags``. -* ``set_edgeflags`` * ``set_blend_color`` * ``set_clip_state`` * ``set_polygon_stipple`` -* ``set_scissor_state`` +* ``set_scissor_state`` sets the bounds for the scissor test, which culls + pixels before blending to render targets. If the :ref:`Rasterizer` does + not have the scissor test enabled, then the scissor bounds never need to + be set since they will not be used. * ``set_viewport_state`` * ``set_vertex_elements`` @@ -72,12 +78,67 @@ stencil-only clears of packed depth-stencil buffers. Drawing ^^^^^^^ -``draw_arrays`` +``draw_arrays`` draws a specified primitive. + +This command is equivalent to calling ``draw_arrays_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. -``draw_elements`` +``draw_elements`` draws a specified primitive using an optional +index buffer. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. ``draw_range_elements`` +XXX: this is (probably) a temporary entrypoint, as the range +information should be available from the vertex_buffer state. +Using this to quickly evaluate a specialized path in the draw +module. + +``draw_arrays_instanced`` draws multiple instances of the same primitive. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``indexBuffer`` set to NULL and ``indexSize`` set to 0. + +``draw_elements_instanced`` draws multiple instances of the same primitive +using an optional index buffer. + +For instanceID in the range between ``startInstance`` +and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive +specified by ``mode`` and sequential numbers in the range between ``start`` +and ``start``+``count``-1, inclusive. + +If ``indexBuffer`` is not NULL, it specifies an index buffer with index +byte size of ``indexSize``. The sequential numbers are used to lookup +the index buffer and the resulting indices in turn are used to fetch +vertex attributes. + +If ``indexBuffer`` is NULL, the sequential numbers are used directly +as indices to fetch vertex attributes. + +If a given vertex element has ``instance_divisor`` set to 0, it is said +it contains per-vertex data and effective vertex attribute address needs +to be recalculated for every index. + + attribAddr = ``stride`` * index + ``src_offset`` + +If a given vertex element has ``instance_divisor`` set to non-zero, +it is said it contains per-instance data and effective vertex attribute +address needs to recalculated for every ``instance_divisor``-th instance. + + attribAddr = ``stride`` * instanceID / ``instance_divisor`` + ``src_offset`` + +In the above formulas, ``src_offset`` is taken from the given vertex element +and ``stride`` is taken from a vertex buffer associated with the given +vertex element. + +The calculated attribAddr is used as an offset into the vertex buffer to +fetch the attribute data. + +The value of ``instanceID`` can be read in a vertex shader through a system +value register declared with INSTANCEID semantic name. + Queries ^^^^^^^ @@ -87,9 +148,51 @@ draws. Queries may be nested, though no state tracker currently exercises this. Queries can be created with ``create_query`` and deleted with -``destroy_query``. To enable a query, use ``begin_query``, and when finished, -use ``end_query`` to stop the query. Finally, ``get_query_result`` is used -to retrieve the results. +``destroy_query``. To start a query, use ``begin_query``, and when finished, +use ``end_query`` to end the query. + +``get_query_result`` is used to retrieve the results of a query. If +the ``wait`` parameter is TRUE, then the ``get_query_result`` call +will block until the results of the query are ready (and TRUE will be +returned). Otherwise, if the ``wait`` parameter is FALSE, the call +will not block and the return value will be TRUE if the query has +completed or FALSE otherwise. + +A common type of query is the occlusion query which counts the number of +fragments/pixels which are written to the framebuffer (and not culled by +Z/stencil/alpha testing or shader KILL instructions). + + +Conditional Rendering +^^^^^^^^^^^^^^^^^^^^^ + +A drawing command can be skipped depending on the outcome of a query +(typically an occlusion query). The ``render_condition`` function specifies +the query which should be checked prior to rendering anything. + +If ``render_condition`` is called with ``query`` = NULL, conditional +rendering is disabled and drawing takes place normally. + +If ``render_condition`` is called with a non-null ``query`` subsequent +drawing commands will be predicated on the outcome of the query. If +the query result is zero subsequent drawing commands will be skipped. + +If ``mode`` is PIPE_RENDER_COND_WAIT the driver will wait for the +query to complete before deciding whether to render. + +If ``mode`` is PIPE_RENDER_COND_NO_WAIT and the query has not yet +completed, the drawing command will be executed normally. If the query +has completed, drawing will be predicated on the outcome of the query. + +If ``mode`` is PIPE_RENDER_COND_BY_REGION_WAIT or +PIPE_RENDER_COND_BY_REGION_NO_WAIT rendering will be predicated as above +for the non-REGION modes but in the case that an occulusion query returns +a non-zero result, regions which were occluded may be ommitted by subsequent +drawing commands. This can result in better performance with some GPUs. +Normally, if the occlusion query returned a non-zero result subsequent +drawing happens normally so fragments may be generated, shaded and +processed even where they're known to be obscured. + Flushing ^^^^^^^^ diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst index fd9e4a1e2d5..55c0f328859 100644 --- a/src/gallium/docs/source/cso/blend.rst +++ b/src/gallium/docs/source/cso/blend.rst @@ -6,9 +6,50 @@ Blend This state controls blending of the final fragments into the target rendering buffers. -XXX it is unresolved what behavior should result if blend_enable is off. +Blend Factors +------------- + +The blend factors largely follow the same pattern as their counterparts +in other modern and legacy drawing APIs. + +XXX blurb about dual-source blends Members ------- -XXX undocumented members +independent_blend_enable + If enabled, blend state is different for each render target, and + for each render target set in the respective member of the rt array. + If disabled, blend state is the same for all render targets, and only + the first member of the rt array contains valid data. +logicop_enable + Enables logic ops. Cannot be enabled at the same time as blending, and + is always the same for all render targets. +logicop_func + The logic operation to use if logic ops are enabled. One of PIPE_LOGICOP. +dither + Whether dithering is enabled. +rt + Contains the per-rendertarget blend state. + +Per-rendertarget Members +------------------------ + +blend_enable + If blending is enabled, perform a blend calculation according to blend + functions and source/destination factors. Otherwise, the incoming fragment + color gets passed unmodified (but colormask still applies). +rgb_func + The blend function to use for rgb channels. One of PIPE_BLEND. +rgb_src_factor + The blend source factor to use for rgb channels. One of PIPE_BLENDFACTOR. +rgb_dst_factor + The blend destination factor to use for rgb channels. One of PIPE_BLENDFACTOR. +alpha_func + The blend function to use for the alpha channel. One of PIPE_BLEND. +alpha_src_factor + The blend source factor to use for the alpha channel. One of PIPE_BLENDFACTOR. +alpha_dst_factor + The blend destination factor to use for alpha channel. One of PIPE_BLENDFACTOR. +colormask + Bitmask of which channels to write. Combination of PIPE_MASK bits. diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst index 00d65fc598a..24cc78c68de 100644 --- a/src/gallium/docs/source/cso/rasterizer.rst +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -7,32 +7,69 @@ The rasterizer state controls the rendering of points, lines and triangles. Attributes include polygon culling state, line width, line stipple, multisample state, scissoring and flat/smooth shading. - Members ------- +bypass_vs_clip_and_viewport +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Whether the entire TCL pipeline should be bypassed. This implies that +vertices are pre-transformed for the viewport, and will not be run +through the vertex shader. + +.. note:: + + Implementations may still clip away vertices that are not in the viewport + when this is set. + flatshade - If set, the provoking vertex of each polygon is used to determine the - color of the entire polygon. If not set, fragment colors will be - interpolated between the vertex colors. - Note that this is separate from the fragment shader input attributes - CONSTANT, LINEAR and PERSPECTIVE. We need the flatshade state at +^^^^^^^^^ + +If set, the provoking vertex of each polygon is used to determine the color +of the entire polygon. If not set, fragment colors will be interpolated +between the vertex colors. + +The actual interpolated shading algorithm is obviously +implementation-dependent, but will usually be Gourard for most hardware. + +.. note:: + + This is separate from the fragment shader input attributes + CONSTANT, LINEAR and PERSPECTIVE. The flatshade state is needed at clipping time to determine how to set the color of new vertices. - Also note that the draw module can implement flat shading by copying - the provoking vertex color to all the other vertices in the primitive. + + :ref:`Draw` can implement flat shading by copying the provoking vertex + color to all the other vertices in the primitive. flatshade_first - Whether the first vertex should be the provoking vertex, for most - primitives. If not set, the last vertex is the provoking vertex. +^^^^^^^^^^^^^^^ + +Whether the first vertex should be the provoking vertex, for most primitives. +If not set, the last vertex is the provoking vertex. + +There are several important exceptions to the specification of this rule. + +* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first + vertex. If the caller wishes to change the provoking vertex, they merely + need to rotate the vertices themselves. +* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no + effect; the provoking vertex is always the last vertex. +* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the + second vertex, not the first. This permits each segment of the fan to have + a different color. + +Other Members +^^^^^^^^^^^^^ light_twoside - If set, there are per-vertex back-facing colors. The draw module + If set, there are per-vertex back-facing colors. :ref:`Draw` uses this state along with the front/back information to set the final vertex colors prior to rasterization. front_winding Indicates the window order of front-facing polygons, either PIPE_WINDING_CW or PIPE_WINDING_CCW + cull_mode Indicates which polygons to cull, either PIPE_WINDING_NONE (cull no polygons), PIPE_WINDING_CW (cull clockwise-winding polygons), @@ -68,7 +105,7 @@ line_stipple_enable line_stipple_pattern 16-bit bitfield of on/off flags, used to pattern the line stipple. line_stipple_factor - When drawinga stippled line, each bit in the stipple pattern is + When drawing a stippled line, each bit in the stipple pattern is repeated N times, where N = line_stipple_factor + 1. line_last_pixel Controls whether the last pixel in a line is drawn or not. OpenGL @@ -98,7 +135,7 @@ sprite_coord_mode coordinate (0,0,0,1). For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have coordinate (0,0,0,1). - This state is needed by the 'draw' module because that's where each + This state is needed by :ref:`Draw` because that's where each point vertex is converted into four quad vertices. There's no other place to emit the new vertex texture coordinates which are required for sprite rendering. @@ -118,45 +155,9 @@ scissor Whether the scissor test is enabled. multisample - Whether :ref:`MSAA` is enabled. - -bypass_vs_clip_and_viewport - Whether the entire TCL pipeline should be bypassed. This implies that - vertices are pre-transformed for the viewport, and will not be run - through the vertex shader. Note that implementations may still clip away - vertices that are not in the viewport. + Whether :term:`MSAA` is enabled. gl_rasterization_rules Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set, the rasterizer will use (0, 0) for pixel centers. - -Notes ------ - -flatshade -^^^^^^^^^ - -The actual interpolated shading algorithm is obviously -implementation-dependent, but will usually be Gourard for most hardware. - -bypass_vs_clip_and_viewport -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When set, this implies that vertices are pre-transformed for the viewport, and -will not be run through the vertex shader. Note that implementations may still -clip away vertices that are not visible. - -flatshade_first -^^^^^^^^^^^^^^^ - -There are several important exceptions to the specification of this rule. - -* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first - vertex. If the caller wishes to change the provoking vertex, they merely - need to rotate the vertices themselves. -* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no - effect; the provoking vertex is always the last vertex. -* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the - second vertex, not the first. This permits each segment of the fan to have - a different color. diff --git a/src/gallium/docs/source/cso/sampler.rst b/src/gallium/docs/source/cso/sampler.rst index e3f1757f57a..044ffffcb4f 100644 --- a/src/gallium/docs/source/cso/sampler.rst +++ b/src/gallium/docs/source/cso/sampler.rst @@ -12,8 +12,6 @@ with the traditional (S, T, R, Q) notation. Members ------- -XXX undocumented compare_mode, compare_func - wrap_s How to wrap the S coordinate. One of PIPE_TEX_WRAP. wrap_t @@ -27,12 +25,16 @@ min_mip_filter PIPE_TEX_FILTER. mag_img_filter The filter to use when magnifying texels. One of PIPE_TEX_FILTER. +compare_mode + If set to PIPE_TEX_COMPARE_R_TO_TEXTURE, texture output is computed + according to compare_func, using r coord and the texture value as operands. + If set to PIPE_TEX_COMPARE_NONE, no comparison calculation is performed. +compare_func + How the comparison is computed. One of PIPE_FUNC. normalized_coords Whether the texture coordinates are normalized. If normalized, they will always be in [0, 1]. If not, they will be in the range of each dimension of the loaded texture. -prefilter - XXX From the Doxy, "weird sampling state exposed by some APIs." Refine. lod_bias The bias to apply to the level of detail. min_lod diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst index 33e846e33d2..100afe33972 100644 --- a/src/gallium/docs/source/distro.rst +++ b/src/gallium/docs/source/distro.rst @@ -31,21 +31,6 @@ Wrapper driver. LLVM Softpipe ^^^^^^^^^^^^^ -nVidia nv04 -^^^^^^^^^^^ - -Deprecated. - -nVidia nv10 -^^^^^^^^^^^ - -Deprecated. - -nVidia nv20 -^^^^^^^^^^^ - -Deprecated. - nVidia nv30 ^^^^^^^^^^^ @@ -61,10 +46,7 @@ VMWare SVGA ATI r300 ^^^^^^^^ -AMD/ATI r600 -^^^^^^^^^^^^ - -Highly experimental. +Testing-quality. Softpipe ^^^^^^^^ @@ -106,20 +88,50 @@ Xorg XFree86 DDX Auxiliary --------- +OS +^^ + +The OS module contains the abstractions for basic operating system services: + +* memory allocation +* simple message logging +* obtaining run-time configuration option +* threading primitives + +This is the bare minimum required to port Gallium to a new platform. + +The OS module already provides the implementations of these abstractions for +the most common platforms. When targeting an embedded platform no +implementation will be provided -- these must be provided separately. + CSO Cache ^^^^^^^^^ +The CSO cache is used to accelerate preparation of state by saving +driver-specific state structures for later use. + +.. _draw: + Draw ^^^^ +Draw is a software :term:`TCL` pipeline for hardware that lacks vertex shaders +or other essential parts of pre-rasterization vertex preparation. + Gallivm ^^^^^^^ Indices ^^^^^^^ -Pipe Buffer Manager -^^^^^^^^^^^^^^^^^^^ +Indices provides tools for translating or generating element indices for +use with element-based rendering. + +Pipe Buffer Managers +^^^^^^^^^^^^^^^^^^^^ + +Each of these managers provides various services to drivers that are not +fully utilizing a memory manager. Remote Debugger ^^^^^^^^^^^^^^^ @@ -127,12 +139,12 @@ Remote Debugger Runtime Assembly Emission ^^^^^^^^^^^^^^^^^^^^^^^^^ -Surface Context Tracker -^^^^^^^^^^^^^^^^^^^^^^^ - TGSI ^^^^ +The TGSI auxiliary module provides basic utilities for manipulating TGSI +streams. + Translate ^^^^^^^^^ diff --git a/src/gallium/docs/source/exts/tgsi.py b/src/gallium/docs/source/exts/tgsi.py new file mode 100644 index 00000000000..e92cd5c4d1b --- /dev/null +++ b/src/gallium/docs/source/exts/tgsi.py @@ -0,0 +1,17 @@ +# tgsi.py +# Sphinx extension providing formatting for TGSI opcodes +# (c) Corbin Simpson 2010 + +import docutils.nodes +import sphinx.addnodes + +def parse_opcode(env, sig, signode): + opcode, desc = sig.split("-", 1) + opcode = opcode.strip().upper() + desc = " (%s)" % desc.strip() + signode += sphinx.addnodes.desc_name(opcode, opcode) + signode += sphinx.addnodes.desc_annotation(desc, desc) + return opcode + +def setup(app): + app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", parse_opcode) diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst index 6a9110ce786..0696cb5d277 100644 --- a/src/gallium/docs/source/glossary.rst +++ b/src/gallium/docs/source/glossary.rst @@ -8,3 +8,16 @@ Glossary Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes multiple samples of the depth buffer, and uses this information to smooth the edges of polygons. + + TCL + Transform, Clipping, & Lighting. The three stages of preparation in a + rasterizing pipeline prior to the actual rasterization of vertices into + fragments. + + NPOT + Non-power-of-two. Usually applied to textures which have at least one + dimension which is not a power of two. + + LOD + Level of Detail. Also spelled "LoD." The value that determines when the + switches between mipmaps occur during texture sampling. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 9631e6967ef..27f65522b69 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -3,6 +3,160 @@ Screen A screen is an object representing the context-independent part of a device. +Useful Flags +------------ + +.. _pipe_cap: + +PIPE_CAP +^^^^^^^^ + +Pipe capabilities help expose hardware functionality not explicitly required +by Gallium. For floating-point values, use :ref:`get_paramf`, and for boolean +or integer values, use :ref:`get_param`. + +The integer capabilities: + +* ``MAX_TEXTURE_IMAGE_UNITS``: The maximum number of samplers available. +* ``NPOT_TEXTURES``: Whether :term:`NPOT` textures may have repeat modes, + normalized coordinates, and mipmaps. +* ``TWO_SIDED_STENCIL``: Whether the stencil test can also affect back-facing + polygons. +* ``GLSL``: Deprecated. +* ``DUAL_SOURCE_BLEND``: Whether dual-source blend factors are supported. See + :ref:`Blend` for more information. +* ``ANISOTROPIC_FILTER``: Whether textures can be filtered anisotropically. +* ``POINT_SPRITE``: Whether point sprites are available. +* ``MAX_RENDER_TARGETS``: The maximum number of render targets that may be + bound. +* ``OCCLUSION_QUERY``: Whether occlusion queries are available. +* ``TEXTURE_SHADOW_MAP``: XXX +* ``MAX_TEXTURE_2D_LEVELS``: The maximum number of mipmap levels available + for a 2D texture. +* ``MAX_TEXTURE_3D_LEVELS``: The maximum number of mipmap levels available + for a 3D texture. +* ``MAX_TEXTURE_CUBE_LEVELS``: The maximum number of mipmap levels available + for a cubemap. +* ``TEXTURE_MIRROR_CLAMP``: Whether mirrored texture coordinates with clamp + are supported. +* ``TEXTURE_MIRROR_REPEAT``: Whether mirrored repeating texture coordinates + are supported. +* ``MAX_VERTEX_TEXTURE_UNITS``: The maximum number of samplers addressable + inside the vertex shader. If this is 0, then the vertex shader cannot + sample textures. +* ``TGSI_CONT_SUPPORTED``: Whether the TGSI CONT opcode is supported. +* ``BLEND_EQUATION_SEPARATE``: Whether alpha blend equations may be different + from color blend equations, in :ref:`Blend` state. +* ``SM3``: Whether the vertex shader and fragment shader support equivalent + opcodes to the Shader Model 3 specification. XXX oh god this is horrible +* ``MAX_PREDICATE_REGISTERS``: XXX +* ``MAX_COMBINED_SAMPLERS``: The total number of samplers accessible from + the vertex and fragment shader, inclusive. +* ``MAX_CONST_BUFFERS``: Maximum number of constant buffers that can be bound + to any shader stage using ``set_constant_buffer``. If 0 or 1, the pipe will + only permit binding one constant buffer per shader, and the shaders will + not permit two-dimensional access to constants. +* ``MAX_CONST_BUFFER_SIZE``: Maximum byte size of a single constant buffer. +* ``INDEP_BLEND_ENABLE``: Whether per-rendertarget blend enabling and channel + masks are supported. If 0, then the first rendertarget's blend mask is + replicated across all MRTs. +* ``INDEP_BLEND_FUNC``: Whether per-rendertarget blend functions are + available. If 0, then the first rendertarget's blend functions affect all + MRTs. +* ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT``: Whether the TGSI property + FS_COORD_ORIGIN with value UPPER_LEFT is supported. +* ``PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT``: Whether the TGSI property + FS_COORD_ORIGIN with value LOWER_LEFT is supported. +* ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER``: Whether the TGSI + property FS_COORD_PIXEL_CENTER with value HALF_INTEGER is supported. +* ``PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER``: Whether the TGSI + property FS_COORD_PIXEL_CENTER with value INTEGER is supported. + +The floating-point capabilities: + +* ``MAX_LINE_WIDTH``: The maximum width of a regular line. +* ``MAX_LINE_WIDTH_AA``: The maximum width of a smoothed line. +* ``MAX_POINT_WIDTH``: The maximum width and height of a point. +* ``MAX_POINT_WIDTH_AA``: The maximum width and height of a smoothed point. +* ``MAX_TEXTURE_ANISOTROPY``: The maximum level of anisotropy that can be + applied to anisotropically filtered textures. +* ``MAX_TEXTURE_LOD_BIAS``: The maximum :term:`LOD` bias that may be applied + to filtered textures. +* ``GUARD_BAND_LEFT``, ``GUARD_BAND_TOP``, ``GUARD_BAND_RIGHT``, + ``GUARD_BAND_BOTTOM``: XXX + +XXX Is there a better home for this? vvv + +If 0 is returned, the driver is not aware of multiple constant buffers, +supports binding of only one constant buffer, and does not support +two-dimensional CONST register file access in TGSI shaders. + +If a value greater than 0 is returned, the driver can have multiple +constant buffers bound to shader stages. The CONST register file can +be accessed with two-dimensional indices, like in the example below. + +DCL CONST[0][0..7] # declare first 8 vectors of constbuf 0 +DCL CONST[3][0] # declare first vector of constbuf 3 +MOV OUT[0], CONST[0][3] # copy vector 3 of constbuf 0 + +For backwards compatibility, one-dimensional access to CONST register +file is still supported. In that case, the constbuf index is assumed +to be 0. + +.. _pipe_buffer_usage: + +PIPE_BUFFER_USAGE +^^^^^^^^^^^^^^^^^ + +These flags control buffer creation. Buffers may only have one role, so +care should be taken to not allocate a buffer with the wrong usage. + +* ``PIXEL``: This is the flag to use for all textures. +* ``VERTEX``: A vertex buffer. +* ``INDEX``: An element buffer. +* ``CONSTANT``: A buffer of shader constants. + +Buffers are inevitably abstracting the pipe's underlying memory management, +so many of their usage flags can be used to direct the way the buffer is +handled. + +* ``CPU_READ``, ``CPU_WRITE``: Whether the user will map and, in the case of + the latter, write to, the buffer. The convenience flag ``CPU_READ_WRITE`` is + available to signify a read/write buffer. +* ``GPU_READ``, ``GPU_WRITE``: Whether the driver will internally need to + read from or write to the buffer. The latter will only happen if the buffer + is made into a render target. +* ``DISCARD``: When set on a map, the contents of the map will be discarded + beforehand. Cannot be used with ``CPU_READ``. +* ``DONTBLOCK``: When set on a map, the map will fail if the buffer cannot be + mapped immediately. +* ``UNSYNCHRONIZED``: When set on a map, any outstanding operations on the + buffer will be ignored. The interaction of any writes to the map and any + operations pending with the buffer are undefined. Cannot be used with + ``CPU_READ``. +* ``FLUSH_EXPLICIT``: When set on a map, written ranges of the map require + explicit flushes using :ref:`buffer_flush_mapped_range`. Requires + ``CPU_WRITE``. + +.. _pipe_texture_usage: + +PIPE_TEXTURE_USAGE +^^^^^^^^^^^^^^^^^^ + +These flags determine the possible roles a texture may be used for during its +lifetime. Texture usage flags are cumulative and may be combined to create a +texture that can be used as multiple things. + +* ``RENDER_TARGET``: A colorbuffer or pixelbuffer. +* ``DISPLAY_TARGET``: A sharable buffer that can be given to another process. +* ``PRIMARY``: A frontbuffer or scanout buffer. +* ``DEPTH_STENCIL``: A depthbuffer, stencilbuffer, or Z buffer. Gallium does + not explicitly provide for stencil-only buffers, so any stencilbuffer + validated here is implicitly also a depthbuffer. +* ``SAMPLER``: A texture that may be sampled from in a fragment or vertex + shader. +* ``DYNAMIC``: A texture that will be mapped frequently. + Methods ------- @@ -18,22 +172,96 @@ get_vendor Returns the screen vendor. +.. _get_param: + get_param ^^^^^^^^^ Get an integer/boolean screen parameter. +**param** is one of the :ref:`PIPE_CAP` names. + +.. _get_paramf: + get_paramf ^^^^^^^^^^ Get a floating-point screen parameter. +**param** is one of the :ref:`PIPE_CAP` names. + +context_create +^^^^^^^^^^^^^^ + +Create a pipe_context. + +**priv** is private data of the caller, which may be put to various +unspecified uses, typically to do with implementing swapbuffers +and/or front-buffer rendering. + is_format_supported ^^^^^^^^^^^^^^^^^^^ See if a format can be used in a specific manner. +**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags. + +Returns TRUE if all usages can be satisfied. + +.. note:: + + ``PIPE_TEXTURE_USAGE_DYNAMIC`` is not a valid usage. + +.. _texture_create: + texture_create ^^^^^^^^^^^^^^ -Given a template of texture setup, create a BO-backed texture. +Given a template of texture setup, create a buffer and texture. + +texture_blanket +^^^^^^^^^^^^^^^ + +Like :ref:`texture_create`, but use a supplied buffer instead of creating a +new one. + +texture_destroy +^^^^^^^^^^^^^^^ + +Destroy a texture. The buffer backing the texture is destroyed if it has no +more references. + +buffer_map +^^^^^^^^^^ + +Map a buffer into memory. + +**usage** is a bitmask of :ref:`PIPE_BUFFER_USAGE` flags. + +Returns a pointer to the map, or NULL if the mapping failed. + +buffer_map_range +^^^^^^^^^^^^^^^^ + +Map a range of a buffer into memory. + +The returned map is always relative to the beginning of the buffer, not the +beginning of the mapped range. + +.. _buffer_flush_mapped_range: + +buffer_flush_mapped_range +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Flush a range of mapped memory into a buffer. + +The buffer must have been mapped with ``PIPE_BUFFER_USAGE_FLUSH_EXPLICIT``. + +**usage** is a bitmask of :ref:`PIPE_BUFFER_USAGE` flags. + +buffer_unmap +^^^^^^^^^^^^ + +Unmap a buffer from memory. + +Any pointers into the map should be considered invalid and discarded. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index ef068448e83..c292cd37d5c 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -6,6 +6,23 @@ for describing shaders. Since Gallium is inherently shaderful, shaders are an important part of the API. TGSI is the only intermediate representation used by all drivers. +Basics +------ + +All TGSI instructions, known as *opcodes*, operate on arbitrary-precision +floating-point four-component vectors. An opcode may have up to one +destination register, known as *dst*, and between zero and three source +registers, called *src0* through *src2*, or simply *src* if there is only +one. + +Some instructions, like :opcode:`I2F`, permit re-interpretation of vector +components as integers. Other instructions permit using registers as +two-component vectors with double precision; see :ref:`Double Opcodes`. + +When an instruction has a scalar result, the result is usually copied into +each of the components of *dst*. When this happens, the result is said to be +*replicated* to *dst*. :opcode:`RCP` is one such instruction. + Instruction Set --------------- @@ -13,7 +30,7 @@ From GL_NV_vertex_program ^^^^^^^^^^^^^^^^^^^^^^^^^ -ARL - Address Register Load +.. opcode:: ARL - Address Register Load .. math:: @@ -26,7 +43,7 @@ ARL - Address Register Load dst.w = \lfloor src.w\rfloor -MOV - Move +.. opcode:: MOV - Move .. math:: @@ -39,7 +56,7 @@ MOV - Move dst.w = src.w -LIT - Light Coefficients +.. opcode:: LIT - Light Coefficients .. math:: @@ -52,33 +69,25 @@ LIT - Light Coefficients dst.w = 1 -RCP - Reciprocal - -.. math:: +.. opcode:: RCP - Reciprocal - dst.x = \frac{1}{src.x} +This instruction replicates its result. - dst.y = \frac{1}{src.x} +.. math:: - dst.z = \frac{1}{src.x} + dst = \frac{1}{src.x} - dst.w = \frac{1}{src.x} +.. opcode:: RSQ - Reciprocal Square Root -RSQ - Reciprocal Square Root +This instruction replicates its result. .. math:: - dst.x = \frac{1}{\sqrt{|src.x|}} - - dst.y = \frac{1}{\sqrt{|src.x|}} - - dst.z = \frac{1}{\sqrt{|src.x|}} + dst = \frac{1}{\sqrt{|src.x|}} - dst.w = \frac{1}{\sqrt{|src.x|}} - -EXP - Approximate Exponential Base 2 +.. opcode:: EXP - Approximate Exponential Base 2 .. math:: @@ -91,7 +100,7 @@ EXP - Approximate Exponential Base 2 dst.w = 1 -LOG - Approximate Logarithm Base 2 +.. opcode:: LOG - Approximate Logarithm Base 2 .. math:: @@ -104,7 +113,7 @@ LOG - Approximate Logarithm Base 2 dst.w = 1 -MUL - Multiply +.. opcode:: MUL - Multiply .. math:: @@ -117,7 +126,7 @@ MUL - Multiply dst.w = src0.w \times src1.w -ADD - Add +.. opcode:: ADD - Add .. math:: @@ -130,33 +139,25 @@ ADD - Add dst.w = src0.w + src1.w -DP3 - 3-component Dot Product - -.. math:: +.. opcode:: DP3 - 3-component Dot Product - dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z +This instruction replicates its result. - dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z +.. math:: - dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z - dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z +.. opcode:: DP4 - 4-component Dot Product -DP4 - 4-component Dot Product +This instruction replicates its result. .. math:: - dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - - dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - - dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w - -DST - Distance Vector +.. opcode:: DST - Distance Vector .. math:: @@ -169,7 +170,7 @@ DST - Distance Vector dst.w = src1.w -MIN - Minimum +.. opcode:: MIN - Minimum .. math:: @@ -182,7 +183,7 @@ MIN - Minimum dst.w = min(src0.w, src1.w) -MAX - Maximum +.. opcode:: MAX - Maximum .. math:: @@ -195,7 +196,7 @@ MAX - Maximum dst.w = max(src0.w, src1.w) -SLT - Set On Less Than +.. opcode:: SLT - Set On Less Than .. math:: @@ -208,7 +209,7 @@ SLT - Set On Less Than dst.w = (src0.w < src1.w) ? 1 : 0 -SGE - Set On Greater Equal Than +.. opcode:: SGE - Set On Greater Equal Than .. math:: @@ -221,7 +222,7 @@ SGE - Set On Greater Equal Than dst.w = (src0.w >= src1.w) ? 1 : 0 -MAD - Multiply And Add +.. opcode:: MAD - Multiply And Add .. math:: @@ -234,7 +235,7 @@ MAD - Multiply And Add dst.w = src0.w \times src1.w + src2.w -SUB - Subtract +.. opcode:: SUB - Subtract .. math:: @@ -247,7 +248,7 @@ SUB - Subtract dst.w = src0.w - src1.w -LRP - Linear Interpolate +.. opcode:: LRP - Linear Interpolate .. math:: @@ -260,7 +261,7 @@ LRP - Linear Interpolate dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w -CND - Condition +.. opcode:: CND - Condition .. math:: @@ -273,7 +274,7 @@ CND - Condition dst.w = (src2.w > 0.5) ? src0.w : src1.w -DP2A - 2-component Dot Product And Add +.. opcode:: DP2A - 2-component Dot Product And Add .. math:: @@ -286,7 +287,7 @@ DP2A - 2-component Dot Product And Add dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x -FRAC - Fraction +.. opcode:: FRAC - Fraction .. math:: @@ -299,7 +300,7 @@ FRAC - Fraction dst.w = src.w - \lfloor src.w\rfloor -CLAMP - Clamp +.. opcode:: CLAMP - Clamp .. math:: @@ -312,9 +313,9 @@ CLAMP - Clamp dst.w = clamp(src0.w, src1.w, src2.w) -FLR - Floor +.. opcode:: FLR - Floor -This is identical to ARL. +This is identical to :opcode:`ARL`. .. math:: @@ -327,7 +328,7 @@ This is identical to ARL. dst.w = \lfloor src.w\rfloor -ROUND - Round +.. opcode:: ROUND - Round .. math:: @@ -340,45 +341,33 @@ ROUND - Round dst.w = round(src.w) -EX2 - Exponential Base 2 - -.. math:: +.. opcode:: EX2 - Exponential Base 2 - dst.x = 2^{src.x} +This instruction replicates its result. - dst.y = 2^{src.x} +.. math:: - dst.z = 2^{src.x} + dst = 2^{src.x} - dst.w = 2^{src.x} +.. opcode:: LG2 - Logarithm Base 2 -LG2 - Logarithm Base 2 +This instruction replicates its result. .. math:: - dst.x = \log_2{src.x} - - dst.y = \log_2{src.x} + dst = \log_2{src.x} - dst.z = \log_2{src.x} - dst.w = \log_2{src.x} +.. opcode:: POW - Power - -POW - Power +This instruction replicates its result. .. math:: - dst.x = src0.x^{src1.x} - - dst.y = src0.x^{src1.x} - - dst.z = src0.x^{src1.x} + dst = src0.x^{src1.x} - dst.w = src0.x^{src1.x} - -XPD - Cross Product +.. opcode:: XPD - Cross Product .. math:: @@ -391,7 +380,7 @@ XPD - Cross Product dst.w = 1 -ABS - Absolute +.. opcode:: ABS - Absolute .. math:: @@ -404,48 +393,36 @@ ABS - Absolute dst.w = |src.w| -RCC - Reciprocal Clamped +.. opcode:: RCC - Reciprocal Clamped + +This instruction replicates its result. XXX cleanup on aisle three .. math:: - dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) - - dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) - dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) - dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) +.. opcode:: DPH - Homogeneous Dot Product - -DPH - Homogeneous Dot Product +This instruction replicates its result. .. math:: - dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w - - dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w - - dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w - dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w +.. opcode:: COS - Cosine -COS - Cosine +This instruction replicates its result. .. math:: - dst.x = \cos{src.x} - - dst.y = \cos{src.x} - - dst.z = \cos{src.x} + dst = \cos{src.x} - dst.w = \cos{src.x} - -DDX - Derivative Relative To X +.. opcode:: DDX - Derivative Relative To X .. math:: @@ -458,7 +435,7 @@ DDX - Derivative Relative To X dst.w = partialx(src.w) -DDY - Derivative Relative To Y +.. opcode:: DDY - Derivative Relative To Y .. math:: @@ -471,32 +448,32 @@ DDY - Derivative Relative To Y dst.w = partialy(src.w) -KILP - Predicated Discard +.. opcode:: KILP - Predicated Discard discard -PK2H - Pack Two 16-bit Floats +.. opcode:: PK2H - Pack Two 16-bit Floats TBD -PK2US - Pack Two Unsigned 16-bit Scalars +.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars TBD -PK4B - Pack Four Signed 8-bit Scalars +.. opcode:: PK4B - Pack Four Signed 8-bit Scalars TBD -PK4UB - Pack Four Unsigned 8-bit Scalars +.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars TBD -RFL - Reflection Vector +.. opcode:: RFL - Reflection Vector .. math:: @@ -508,145 +485,171 @@ RFL - Reflection Vector dst.w = 1 -Considered for removal. +.. note:: + + Considered for removal. -SEQ - Set On Equal +.. opcode:: SEQ - Set On Equal .. math:: dst.x = (src0.x == src1.x) ? 1 : 0 + dst.y = (src0.y == src1.y) ? 1 : 0 + dst.z = (src0.z == src1.z) ? 1 : 0 + dst.w = (src0.w == src1.w) ? 1 : 0 -SFL - Set On False +.. opcode:: SFL - Set On False + +This instruction replicates its result. .. math:: - dst.x = 0 - dst.y = 0 - dst.z = 0 - dst.w = 0 + dst = 0 + +.. note:: + + Considered for removal. -Considered for removal. -SGT - Set On Greater Than +.. opcode:: SGT - Set On Greater Than .. math:: dst.x = (src0.x > src1.x) ? 1 : 0 + dst.y = (src0.y > src1.y) ? 1 : 0 - dst.z = (src0.z > src1.z) ? 1 : 0 - dst.w = (src0.w > src1.w) ? 1 : 0 + dst.z = (src0.z > src1.z) ? 1 : 0 -SIN - Sine + dst.w = (src0.w > src1.w) ? 1 : 0 -.. math:: - dst.x = \sin{src.x} +.. opcode:: SIN - Sine - dst.y = \sin{src.x} +This instruction replicates its result. - dst.z = \sin{src.x} +.. math:: - dst.w = \sin{src.x} + dst = \sin{src.x} -SLE - Set On Less Equal Than +.. opcode:: SLE - Set On Less Equal Than .. math:: dst.x = (src0.x <= src1.x) ? 1 : 0 + dst.y = (src0.y <= src1.y) ? 1 : 0 + dst.z = (src0.z <= src1.z) ? 1 : 0 + dst.w = (src0.w <= src1.w) ? 1 : 0 -SNE - Set On Not Equal +.. opcode:: SNE - Set On Not Equal .. math:: dst.x = (src0.x != src1.x) ? 1 : 0 + dst.y = (src0.y != src1.y) ? 1 : 0 + dst.z = (src0.z != src1.z) ? 1 : 0 + dst.w = (src0.w != src1.w) ? 1 : 0 -STR - Set On True +.. opcode:: STR - Set On True + +This instruction replicates its result. .. math:: - dst.x = 1 - dst.y = 1 - dst.z = 1 - dst.w = 1 + dst = 1 -TEX - Texture Lookup +.. opcode:: TEX - Texture Lookup TBD -TXD - Texture Lookup with Derivatives +.. opcode:: TXD - Texture Lookup with Derivatives TBD -TXP - Projective Texture Lookup +.. opcode:: TXP - Projective Texture Lookup TBD -UP2H - Unpack Two 16-Bit Floats +.. opcode:: UP2H - Unpack Two 16-Bit Floats TBD - Considered for removal. +.. note:: + + Considered for removal. -UP2US - Unpack Two Unsigned 16-Bit Scalars +.. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars TBD - Considered for removal. +.. note:: + + Considered for removal. -UP4B - Unpack Four Signed 8-Bit Values +.. opcode:: UP4B - Unpack Four Signed 8-Bit Values TBD - Considered for removal. +.. note:: -UP4UB - Unpack Four Unsigned 8-Bit Scalars + Considered for removal. + +.. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars TBD - Considered for removal. +.. note:: + + Considered for removal. -X2D - 2D Coordinate Transformation +.. opcode:: X2D - 2D Coordinate Transformation .. math:: dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w + dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w -Considered for removal. +.. note:: + + Considered for removal. From GL_NV_vertex_program2 ^^^^^^^^^^^^^^^^^^^^^^^^^^ -ARA - Address Register Add +.. opcode:: ARA - Address Register Add TBD - Considered for removal. +.. note:: -ARR - Address Register Load With Round + Considered for removal. + +.. opcode:: ARR - Address Register Load With Round .. math:: @@ -659,26 +662,28 @@ ARR - Address Register Load With Round dst.w = round(src.w) -BRA - Branch +.. opcode:: BRA - Branch pc = target - Considered for removal. +.. note:: + + Considered for removal. -CAL - Subroutine Call +.. opcode:: CAL - Subroutine Call push(pc) pc = target -RET - Subroutine Call Return +.. opcode:: RET - Subroutine Call Return pc = pop() Potential restrictions: * Only occurs at end of function. -SSG - Set Sign +.. opcode:: SSG - Set Sign .. math:: @@ -691,7 +696,7 @@ SSG - Set Sign dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 -CMP - Compare +.. opcode:: CMP - Compare .. math:: @@ -704,7 +709,7 @@ CMP - Compare dst.w = (src0.w < 0) ? src1.w : src2.w -KIL - Conditional Discard +.. opcode:: KIL - Conditional Discard .. math:: @@ -713,7 +718,7 @@ KIL - Conditional Discard endif -SCS - Sine Cosine +.. opcode:: SCS - Sine Cosine .. math:: @@ -726,12 +731,12 @@ SCS - Sine Cosine dst.y = 1 -TXB - Texture Lookup With Bias +.. opcode:: TXB - Texture Lookup With Bias TBD -NRM - 3-component Vector Normalise +.. opcode:: NRM - 3-component Vector Normalise .. math:: @@ -744,7 +749,7 @@ NRM - 3-component Vector Normalise dst.w = 1 -DIV - Divide +.. opcode:: DIV - Divide .. math:: @@ -757,35 +762,31 @@ DIV - Divide dst.w = \frac{src0.w}{src1.w} -DP2 - 2-component Dot Product +.. opcode:: DP2 - 2-component Dot Product -.. math:: +This instruction replicates its result. - dst.x = src0.x \times src1.x + src0.y \times src1.y - - dst.y = src0.x \times src1.x + src0.y \times src1.y - - dst.z = src0.x \times src1.x + src0.y \times src1.y +.. math:: - dst.w = src0.x \times src1.x + src0.y \times src1.y + dst = src0.x \times src1.x + src0.y \times src1.y -TXL - Texture Lookup With LOD +.. opcode:: TXL - Texture Lookup With LOD TBD -BRK - Break +.. opcode:: BRK - Break TBD -IF - If +.. opcode:: IF - If TBD -BGNFOR - Begin a For-Loop +.. opcode:: BGNFOR - Begin a For-Loop dst.x = floor(src.x) dst.y = floor(src.y) @@ -798,25 +799,31 @@ BGNFOR - Begin a For-Loop Note: The destination must be a loop register. The source must be a constant register. - Considered for cleanup / removal. +.. note:: + + Considered for cleanup. + +.. note:: + + Considered for removal. -REP - Repeat +.. opcode:: REP - Repeat TBD -ELSE - Else +.. opcode:: ELSE - Else TBD -ENDIF - End If +.. opcode:: ENDIF - End If TBD -ENDFOR - End a For-Loop +.. opcode:: ENDFOR - End a For-Loop dst.x = dst.x + dst.z dst.y = dst.y - 1.0 @@ -827,30 +834,48 @@ ENDFOR - End a For-Loop Note: The destination must be a loop register. - Considered for cleanup / removal. +.. note:: -ENDREP - End Repeat + Considered for cleanup. + +.. note:: + + Considered for removal. + +.. opcode:: ENDREP - End Repeat TBD -PUSHA - Push Address Register On Stack +.. opcode:: PUSHA - Push Address Register On Stack push(src.x) push(src.y) push(src.z) push(src.w) - Considered for cleanup / removal. +.. note:: + + Considered for cleanup. + +.. note:: + + Considered for removal. -POPA - Pop Address Register From Stack +.. opcode:: POPA - Pop Address Register From Stack dst.w = pop() dst.z = pop() dst.y = pop() dst.x = pop() - Considered for cleanup / removal. +.. note:: + + Considered for cleanup. + +.. note:: + + Considered for removal. From GL_NV_gpu_program4 @@ -858,7 +883,7 @@ From GL_NV_gpu_program4 Support for these opcodes indicated by a special pipe capability bit (TBD). -CEIL - Ceiling +.. opcode:: CEIL - Ceiling .. math:: @@ -871,7 +896,7 @@ CEIL - Ceiling dst.w = \lceil src.w\rceil -I2F - Integer To Float +.. opcode:: I2F - Integer To Float .. math:: @@ -884,7 +909,7 @@ I2F - Integer To Float dst.w = (float) src.w -NOT - Bitwise Not +.. opcode:: NOT - Bitwise Not .. math:: @@ -897,7 +922,7 @@ NOT - Bitwise Not dst.w = ~src.w -TRUNC - Truncate +.. opcode:: TRUNC - Truncate .. math:: @@ -910,7 +935,7 @@ TRUNC - Truncate dst.w = trunc(src.w) -SHL - Shift Left +.. opcode:: SHL - Shift Left .. math:: @@ -923,7 +948,7 @@ SHL - Shift Left dst.w = src0.w << src1.x -SHR - Shift Right +.. opcode:: SHR - Shift Right .. math:: @@ -936,7 +961,7 @@ SHR - Shift Right dst.w = src0.w >> src1.x -AND - Bitwise And +.. opcode:: AND - Bitwise And .. math:: @@ -949,7 +974,7 @@ AND - Bitwise And dst.w = src0.w & src1.w -OR - Bitwise Or +.. opcode:: OR - Bitwise Or .. math:: @@ -962,7 +987,7 @@ OR - Bitwise Or dst.w = src0.w | src1.w -MOD - Modulus +.. opcode:: MOD - Modulus .. math:: @@ -975,20 +1000,20 @@ MOD - Modulus dst.w = src0.w \bmod src1.w -XOR - Bitwise Xor +.. opcode:: XOR - Bitwise Xor .. math:: - dst.x = src0.x ^ src1.x + dst.x = src0.x \oplus src1.x - dst.y = src0.y ^ src1.y + dst.y = src0.y \oplus src1.y - dst.z = src0.z ^ src1.z + dst.z = src0.z \oplus src1.z - dst.w = src0.w ^ src1.w + dst.w = src0.w \oplus src1.w -SAD - Sum Of Absolute Differences +.. opcode:: SAD - Sum Of Absolute Differences .. math:: @@ -1001,17 +1026,17 @@ SAD - Sum Of Absolute Differences dst.w = |src0.w - src1.w| + src2.w -TXF - Texel Fetch +.. opcode:: TXF - Texel Fetch TBD -TXQ - Texture Size Query +.. opcode:: TXQ - Texture Size Query TBD -CONT - Continue +.. opcode:: CONT - Continue TBD @@ -1020,12 +1045,12 @@ From GL_NV_geometry_program4 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -EMIT - Emit +.. opcode:: EMIT - Emit TBD -ENDPRIM - End Primitive +.. opcode:: ENDPRIM - End Primitive TBD @@ -1034,62 +1059,171 @@ From GLSL ^^^^^^^^^^ -BGNLOOP - Begin a Loop +.. opcode:: BGNLOOP - Begin a Loop TBD -BGNSUB - Begin Subroutine +.. opcode:: BGNSUB - Begin Subroutine TBD -ENDLOOP - End a Loop +.. opcode:: ENDLOOP - End a Loop TBD -ENDSUB - End Subroutine +.. opcode:: ENDSUB - End Subroutine TBD -NOP - No Operation +.. opcode:: NOP - No Operation Do nothing. -NRM4 - 4-component Vector Normalise - -.. math:: - - dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} +.. opcode:: NRM4 - 4-component Vector Normalise - dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} +This instruction replicates its result. - dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} +.. math:: - dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} ps_2_x ^^^^^^^^^^^^ -CALLNZ - Subroutine Call If Not Zero +.. opcode:: CALLNZ - Subroutine Call If Not Zero TBD -IFC - If +.. opcode:: IFC - If TBD -BREAKC - Break Conditional +.. opcode:: BREAKC - Break Conditional TBD +.. _doubleopcodes: + +Double Opcodes +^^^^^^^^^^^^^^^ + +.. opcode:: DADD - Add Double + +.. math:: + + dst.xy = src0.xy + src1.xy + + dst.zw = src0.zw + src1.zw + + +.. opcode:: DDIV - Divide Double + +.. math:: + + dst.xy = src0.xy / src1.xy + + dst.zw = src0.zw / src1.zw + +.. opcode:: DSEQ - Set Double on Equal + +.. math:: + + dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F + + dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F + +.. opcode:: DSLT - Set Double on Less than + +.. math:: + + dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F + + dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F + +.. opcode:: DFRAC - Double Fraction + +.. math:: + + dst.xy = src.xy - \lfloor src.xy\rfloor + + dst.zw = src.zw - \lfloor src.zw\rfloor + + +.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components + +.. math:: + + dst0.xy = frexp(src.xy, dst1.xy) + + dst0.zw = frexp(src.zw, dst1.zw) + +.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2 + +.. math:: + + dst.xy = ldexp(src0.xy, src1.xy) + + dst.zw = ldexp(src0.zw, src1.zw) + +.. opcode:: DMIN - Minimum Double + +.. math:: + + dst.xy = min(src0.xy, src1.xy) + + dst.zw = min(src0.zw, src1.zw) + +.. opcode:: DMAX - Maximum Double + +.. math:: + + dst.xy = max(src0.xy, src1.xy) + + dst.zw = max(src0.zw, src1.zw) + +.. opcode:: DMUL - Multiply Double + +.. math:: + + dst.xy = src0.xy \times src1.xy + + dst.zw = src0.zw \times src1.zw + + +.. opcode:: DMAD - Multiply And Add Doubles + +.. math:: + + dst.xy = src0.xy \times src1.xy + src2.xy + + dst.zw = src0.zw \times src1.zw + src2.zw + + +.. opcode:: DRCP - Reciprocal Double + +.. math:: + + dst.xy = \frac{1}{src.xy} + + dst.zw = \frac{1}{src.zw} + +.. opcode:: DSQRT - Square root double + +.. math:: + + dst.xy = \sqrt{src.xy} + + dst.zw = \sqrt{src.zw} + Explanation of symbols used ------------------------------ @@ -1137,25 +1271,41 @@ Keywords discard Discard fragment. - dst First destination register. + pc Program counter. - dst0 First destination register. + target Label of target instruction. - pc Program counter. - src First source register. +Other tokens +--------------- - src0 First source register. - src1 Second source register. +Declaration +^^^^^^^^^^^ - src2 Third source register. - target Label of target instruction. +Declares a register that is will be referenced as an operand in Instruction +tokens. +File field contains register file that is being declared and is one +of TGSI_FILE. -Other tokens ---------------- +UsageMask field specifies which of the register components can be accessed +and is one of TGSI_WRITEMASK. + +Interpolate field is only valid for fragment shader INPUT register files. +It specifes the way input is being interpolated by the rasteriser and is one +of TGSI_INTERPOLATE. + +If Dimension flag is set to 1, a Declaration Dimension token follows. + +If Semantic flag is set to 1, a Declaration Semantic token follows. + +CylindricalWrap bitfield is only valid for fragment shader INPUT register +files. It specifies which register components should be subject to cylindrical +wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X +is set to 1, the X component should be interpolated according to cylindrical +wrapping rules. Declaration Semantic @@ -1187,9 +1337,8 @@ are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used for the perspective divide, if enabled. As a vertex shader output, position should be scaled to the viewport. When -used in fragment shaders, position will --- - -XXX --- wait a minute. Should position be in [0,1] for x and y? +used in fragment shaders, position will be in window coordinates. The convention +used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties. XXX additionally, is there a way to configure the perspective divide? it's accelerated on most chipsets AFAIK... @@ -1266,3 +1415,85 @@ TGSI_SEMANTIC_EDGEFLAG """""""""""""""""""""" XXX no clue + + +Properties +^^^^^^^^^^^^^^^^^^^^^^^^ + + + Properties are general directives that apply to the whole TGSI program. + +FS_COORD_ORIGIN +""""""""""""""" + +Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin. +The default value is UPPER_LEFT. + +If UPPER_LEFT, the position will be (0,0) at the upper left corner and +increase downward and rightward. +If LOWER_LEFT, the position will be (0,0) at the lower left corner and +increase upward and rightward. + +OpenGL defaults to LOWER_LEFT, and is configurable with the +GL_ARB_fragment_coord_conventions extension. + +DirectX 9/10 use UPPER_LEFT. + +FS_COORD_PIXEL_CENTER +""""""""""""""""""""" + +Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention. +The default value is HALF_INTEGER. + +If HALF_INTEGER, the fractionary part of the position will be 0.5 +If INTEGER, the fractionary part of the position will be 0.0 + +Note that this does not affect the set of fragments generated by +rasterization, which is instead controlled by gl_rasterization_rules in the +rasterizer. + +OpenGL defaults to HALF_INTEGER, and is configurable with the +GL_ARB_fragment_coord_conventions extension. + +DirectX 9 uses INTEGER. +DirectX 10 uses HALF_INTEGER. + + + +Texture Sampling and Texture Formats +------------------------------------ + +This table shows how texture image components are returned as (x,y,z,w) tuples +by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and +:opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as +well. + ++--------------------+--------------+--------------------+--------------+ +| Texture Components | Gallium | OpenGL | Direct3D 9 | ++====================+==============+====================+==============+ +| R | XXX TBD | (r, 0, 0, 1) | (r, 1, 1, 1) | ++--------------------+--------------+--------------------+--------------+ +| RG | XXX TBD | (r, g, 0, 1) | (r, g, 1, 1) | ++--------------------+--------------+--------------------+--------------+ +| RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) | ++--------------------+--------------+--------------------+--------------+ +| RGBA | (r, g, b, a) | (r, g, b, a) | (r, g, b, a) | ++--------------------+--------------+--------------------+--------------+ +| A | (0, 0, 0, a) | (0, 0, 0, a) | (0, 0, 0, a) | ++--------------------+--------------+--------------------+--------------+ +| L | (l, l, l, 1) | (l, l, l, 1) | (l, l, l, 1) | ++--------------------+--------------+--------------------+--------------+ +| LA | (l, l, l, a) | (l, l, l, a) | (l, l, l, a) | ++--------------------+--------------+--------------------+--------------+ +| I | (i, i, i, i) | (i, i, i, i) | N/A | ++--------------------+--------------+--------------------+--------------+ +| UV | XXX TBD | (0, 0, 0, 1) | (u, v, 1, 1) | +| | | [#envmap-bumpmap]_ | | ++--------------------+--------------+--------------------+--------------+ +| Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) | +| | | [#depth-tex-mode]_ | | ++--------------------+--------------+--------------------+--------------+ + +.. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt +.. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z) + or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE. diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba8..7f2b33c2dcc 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" - +#include <stdio.h> /** The standard assert macro doesn't seem to work reliably */ #define ASSERT(x) \ @@ -49,7 +49,6 @@ } - #define JOIN(x, y) JOIN_AGAIN(x, y) #define JOIN_AGAIN(x, y) x ## y @@ -358,6 +357,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +370,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 3a3f968a492..246fe210542 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -33,7 +33,7 @@ #include <stdio.h> #include <assert.h> #include <stdint.h> -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" #include "cell/common.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index ebb7a7acc44..5bff9869fd0 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" #include "util/u_memory.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_screen.h" #include "draw/draw_context.h" @@ -124,7 +124,7 @@ cell_is_buffer_referenced( struct pipe_context *pipe, struct pipe_context * cell_create_context(struct pipe_screen *screen, - struct cell_winsys *cws) + void *priv ) { struct cell_context *cell; uint i; @@ -136,9 +136,10 @@ cell_create_context(struct pipe_screen *screen, memset(cell, 0, sizeof(*cell)); - cell->winsys = cws; + cell->winsys = NULL; /* XXX: fixme - get this from screen? */ cell->pipe.winsys = screen->winsys; cell->pipe.screen = screen; + cell->pipe.priv = priv; cell->pipe.destroy = cell_destroy_context; cell->pipe.clear = cell_clear; diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 5c3188e7f9d..905cd5db390 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_fence fence; struct cell_buffer_node *head; }; @@ -115,7 +115,7 @@ struct cell_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_buffer *constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; /** Associated with each command/batch buffer is a list of pipe_buffers @@ -188,8 +188,9 @@ cell_context(struct pipe_context *pipe) } -extern struct pipe_context * -cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws); +struct pipe_context * +cell_create_context(struct pipe_screen *screen, + void *priv ); extern void cell_vertex_shader_queue_flush(struct draw_context *draw); diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 3fa8b975d39..bffd0fac6fe 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -33,8 +33,8 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_simple_screen.h" +#include "util/u_inlines.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -51,17 +51,17 @@ cell_map_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) { - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + if (sp->constants[i] && sp->constants[i]->size) { + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, sp->mapped_constants[i], - sp->constants[i].buffer->size); + sp->constants[i]->size); } } - draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, 0, sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[PIPE_SHADER_VERTEX].buffer->size); + sp->constants[PIPE_SHADER_VERTEX]->size); } static void @@ -70,8 +70,8 @@ cell_unmap_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + if (sp->constants[i] && sp->constants[i]->size) + ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c index 13125a9fa30..e10071529a8 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -27,7 +27,7 @@ #include <unistd.h> #include "util/u_memory.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "cell_context.h" #include "cell_batch.h" #include "cell_fence.h" diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 66d4b3b6a31..0dab34075da 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -408,7 +408,7 @@ gen_blend(const struct pipe_blend_state *blend, int one_reg = -1; int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; - ASSERT(blend->blend_enable); + ASSERT(blend->rt[0].blend_enable); /* packed RGBA -> float colors */ unpack_colors(f, color_format, fbRGBA_reg, @@ -420,7 +420,7 @@ gen_blend(const struct pipe_blend_state *blend, * because in some cases (like PIPE_BLENDFACTOR_ONE and * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. */ - switch (blend->rgb_src_factor) { + switch (blend->rt[0].rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: /* factors = (1,1,1), so term = (R,G,B) */ spe_move(f, term1R_reg, fragR_reg); @@ -574,7 +574,7 @@ gen_blend(const struct pipe_blend_state *blend, * the full term A*factor, not just the factor itself, because * in many cases we can avoid doing unnecessary multiplies. */ - switch (blend->alpha_src_factor) { + switch (blend->rt[0].alpha_src_factor) { case PIPE_BLENDFACTOR_ZERO: /* factor = 0, so term = 0 */ spe_load_float(f, term1A_reg, 0.0f); @@ -648,7 +648,7 @@ gen_blend(const struct pipe_blend_state *blend, * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because * in many cases we can avoid doing unnecessary multiplies. */ - switch (blend->rgb_dst_factor) { + switch (blend->rt[0].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ spe_move(f, term2R_reg, fbR_reg); @@ -786,7 +786,7 @@ gen_blend(const struct pipe_blend_state *blend, * the full term Afb*factor, not just the factor itself, because * in many cases we can avoid doing unnecessary multiplies. */ - switch (blend->alpha_dst_factor) { + switch (blend->rt[0].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: /* factor = 1, so term = Afb */ spe_move(f, term2A_reg, fbA_reg); @@ -858,7 +858,7 @@ gen_blend(const struct pipe_blend_state *blend, /* * Combine Src/Dest RGB terms as per the blend equation. */ - switch (blend->rgb_func) { + switch (blend->rt[0].rgb_func) { case PIPE_BLEND_ADD: spe_fa(f, fragR_reg, term1R_reg, term2R_reg); spe_fa(f, fragG_reg, term1G_reg, term2G_reg); @@ -891,7 +891,7 @@ gen_blend(const struct pipe_blend_state *blend, /* * Combine Src/Dest A term */ - switch (blend->alpha_func) { + switch (blend->rt[0].alpha_func) { case PIPE_BLEND_ADD: spe_fa(f, fragA_reg, term1A_reg, term2A_reg); break; @@ -2118,7 +2118,7 @@ cell_gen_fragment_function(struct cell_context *cell, spe_comment(f, 0, "Fetch quad colors from tile"); spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - if (blend->blend_enable) { + if (blend->rt[0].blend_enable) { spe_comment(f, 0, "Perform blending"); gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); @@ -2143,9 +2143,9 @@ cell_gen_fragment_function(struct cell_context *cell, gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } - if (blend->colormask != PIPE_MASK_RGBA) { + if (blend->rt[0].colormask != PIPE_MASK_RGBA) { spe_comment(f, 0, "Compute color mask"); - gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); + gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg); } /* Mix fragment colors with framebuffer colors using the quad/pixel mask: diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index c18a5d0635e..3259c58687c 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -31,7 +31,7 @@ */ #include "util/u_memory.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "draw/draw_context.h" #include "cell_context.h" #include "cell_flush.h" diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index d185c6b8497..7681e3411e8 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -28,7 +28,7 @@ #include "util/u_memory.h" #include "util/u_simple_screen.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -86,6 +86,12 @@ cell_get_param(struct pipe_screen *screen, int param) return 0; /* XXX to do */ case PIPE_CAP_TGSI_CONT_SUPPORTED: return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: return 0; } @@ -168,6 +174,7 @@ cell_create_screen(struct pipe_winsys *winsys) screen->get_param = cell_get_param; screen->get_paramf = cell_get_paramf; screen->is_format_supported = cell_is_format_supported; + screen->context_create = cell_create_context; cell_init_screen_texture_funcs(screen); u_simple_screen_init(screen); diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index b193170f9ce..7adedcde57c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -50,7 +50,7 @@ extern void -cell_update_derived( struct cell_context *softpipe ); +cell_update_derived( struct cell_context *cell ); extern void diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5b87286d4c5..282f05ba08b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_math.h" #include "cell_context.h" @@ -240,12 +240,12 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { const uint shader = PIPE_SHADER_FRAGMENT; - const uint num_const = cell->constants[shader].buffer->size / sizeof(float); + const uint num_const = cell->constants[shader]->size / sizeof(float); uint i, j; float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); uint32_t *ibuf = (uint32_t *) buf; const float *constants = pipe_buffer_map(cell->pipe.screen, - cell->constants[shader].buffer, + cell->constants[shader], PIPE_BUFFER_USAGE_CPU_READ); ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; ibuf[4] = num_const; @@ -253,7 +253,7 @@ cell_emit_state(struct cell_context *cell) for (i = 0; i < num_const; i++) { buf[j++] = constants[i]; } - pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader]); } if (cell->dirty & (CELL_NEW_FRAMEBUFFER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index d97c22b2efe..21af7ed1c3f 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -999,23 +999,23 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) /* Does the selected blend mode make use of the source / destination * color (RGB) blend factors? */ - boolean need_color_factor = b->blend_enable - && (b->rgb_func != PIPE_BLEND_MIN) - && (b->rgb_func != PIPE_BLEND_MAX); + boolean need_color_factor = b->rt[0].blend_enable + && (b->rt[0].rgb_func != PIPE_BLEND_MIN) + && (b->rt[0].rgb_func != PIPE_BLEND_MAX); /* Does the selected blend mode make use of the source / destination * alpha blend factors? */ - boolean need_alpha_factor = b->blend_enable - && (b->alpha_func != PIPE_BLEND_MIN) - && (b->alpha_func != PIPE_BLEND_MAX); + boolean need_alpha_factor = b->rt[0].blend_enable + && (b->rt[0].alpha_func != PIPE_BLEND_MIN) + && (b->rt[0].alpha_func != PIPE_BLEND_MAX); - if (b->blend_enable) { - sF[0] = b->rgb_src_factor; + if (b->rt[0].blend_enable) { + sF[0] = b->rt[0].rgb_src_factor; sF[1] = sF[0]; sF[2] = sF[0]; - switch (b->alpha_src_factor & 0x0f) { + switch (b->rt[0].alpha_src_factor & 0x0f) { case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: sF[3] = PIPE_BLENDFACTOR_ONE; break; @@ -1023,30 +1023,30 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) case PIPE_BLENDFACTOR_DST_COLOR: case PIPE_BLENDFACTOR_CONST_COLOR: case PIPE_BLENDFACTOR_SRC1_COLOR: - sF[3] = b->alpha_src_factor + 1; + sF[3] = b->rt[0].alpha_src_factor + 1; break; default: - sF[3] = b->alpha_src_factor; + sF[3] = b->rt[0].alpha_src_factor; } - dF[0] = b->rgb_dst_factor; + dF[0] = b->rt[0].rgb_dst_factor; dF[1] = dF[0]; dF[2] = dF[0]; - switch (b->alpha_dst_factor & 0x0f) { + switch (b->rt[0].alpha_dst_factor & 0x0f) { case PIPE_BLENDFACTOR_SRC_COLOR: case PIPE_BLENDFACTOR_DST_COLOR: case PIPE_BLENDFACTOR_CONST_COLOR: case PIPE_BLENDFACTOR_SRC1_COLOR: - dF[3] = b->alpha_dst_factor + 1; + dF[3] = b->rt[0].alpha_dst_factor + 1; break; default: - dF[3] = b->alpha_dst_factor; + dF[3] = b->rt[0].alpha_dst_factor; } - func[0] = b->rgb_func; + func[0] = b->rt[0].rgb_func; func[1] = func[0]; func[2] = func[0]; - func[3] = b->alpha_func; + func[3] = b->rt[0].alpha_func; } else { sF[0] = PIPE_BLENDFACTOR_ONE; sF[1] = PIPE_BLENDFACTOR_ONE; @@ -1067,7 +1067,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) /* If alpha writing is enabled and the alpha blend mode requires use of * the alpha factor, calculate the alpha factor. */ - if (((b->colormask & 8) != 0) && need_alpha_factor) { + if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) { src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], frag[3], pixel[3]); @@ -1091,8 +1091,8 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) src_factor[2] = dst_factor[3]; } else if (need_color_factor) { emit_color_factor_calculation(f, - b->rgb_src_factor, - b->colormask, + b->rt[0].rgb_src_factor, + b->rt[0].colormask, frag, pixel, const_color, src_factor); } @@ -1111,15 +1111,15 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) dst_factor[2] = src_factor[2]; } else if (need_color_factor) { emit_color_factor_calculation(f, - b->rgb_dst_factor, - b->colormask, + b->rt[0].rgb_dst_factor, + b->rt[0].colormask, frag, pixel, const_color, dst_factor); } for (i = 0; i < 4; ++i) { - if ((b->colormask & (1U << i)) != 0) { + if ((b->rt[0].colormask & (1U << i)) != 0) { emit_blend_calculation(f, func[i], sF[i], dF[i], frag[i], src_factor[i], @@ -1216,7 +1216,7 @@ cell_generate_logic_op(struct spe_function *f, /* Short-circuit the noop and invert cases. */ - if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) { + if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) { spe_bi(f, 0, 0, 0); return; } else if (logic_op == PIPE_LOGICOP_INVERT) { diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 6568c784fec..9b2f86fdfba 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -27,8 +27,8 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_inlines.h" +#include "util/u_simple_screen.h" #include "draw/draw_context.h" #include "tgsi/tgsi_parse.h" @@ -183,7 +183,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct cell_context *cell = cell_context(pipe); @@ -193,7 +193,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, draw_flush(cell->draw); /* note: reference counting */ - pipe_buffer_reference(&cell->constants[shader].buffer, buf->buffer); + pipe_buffer_reference(&cell->constants[shader], buf); if (shader == PIPE_SHADER_VERTEX) cell->dirty |= CELL_NEW_VS_CONSTANTS; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 998944f77a3..fad290dfa0e 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -33,8 +33,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_inlines.h" +#include "util/u_simple_screen.h" #include "util/u_format.h" #include "util/u_math.h" diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 403cf6d50fc..cf8cd411598 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "util/u_math.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h index ae2af5696b5..e227e065ff3 100644 --- a/src/gallium/drivers/cell/ppu/cell_winsys.h +++ b/src/gallium/drivers/cell/ppu/cell_winsys.h @@ -38,13 +38,10 @@ */ struct cell_winsys { - uint preferredFormat; + uint dummy; }; -extern struct cell_winsys * -cell_get_winsys(uint format); - #endif diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 12b855a3db2..55bd85bde2b 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -543,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d86d8e09a51..d2166a49016 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 86056799405..0ca92af248d 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d258..98919c43ffc 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51d..a9d72f84d56 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,50 +93,64 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ enum pipe_format color_format; enum pipe_format depth_format; - uint width, height; /**< size in pixels */ - uint width_tiles, height_tiles; /**< width and height in tiles */ + uint width; /**< width in pixels */ + uint height; /**< height in pixels */ + uint width_tiles; /**< width in tiles */ + uint height_tiles; /**< width in tiles */ uint color_clear_value; uint depth_clear_value; uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; - ushort width, height, depth; + ushort width; + ushort height; + ushort depth; ushort tiles_per_row; uint bytes_per_image; /** texcoord scale factors */ - vector float scale_s, scale_t, scale_r; + vector float scale_s; + vector float scale_t; + vector float scale_r; /** texcoord masks (if REPEAT then size-1, else ~0) */ - vector signed int mask_s, mask_t, mask_r; + vector signed int mask_s; + vector signed int mask_t; + vector signed int mask_r; /** texcoord clamp limits */ - vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; + vector signed int max_s; + vector signed int max_t; + vector signed int max_r; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,18 +169,19 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; /** Current tiles' status */ - ubyte cur_ctile_status, cur_ztile_status; + ubyte cur_ctile_status; + ubyte cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +190,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +202,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index eba9f95cf1f..53283740805 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -207,9 +207,9 @@ spu_fallback_fragment_ops(uint x, uint y, * If we'll need the current framebuffer/tile colors for blending * or logicop or colormask, fetch them now. */ - if (spu.blend.blend_enable || + if (spu.blend.rt[0].blend_enable || spu.blend.logicop_enable || - spu.blend.colormask != 0xf) { + spu.blend.rt[0].colormask != 0xf) { #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ fbc0 = colorTile->ui[y][x*2+0]; @@ -228,7 +228,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Do blending */ - if (spu.blend.blend_enable) { + if (spu.blend.rt[0].blend_enable) { /* blending terms, misc regs */ vector float term1r, term1g, term1b, term1a; vector float term2r, term2g, term2b, term2a; @@ -261,7 +261,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Src RGB terms (fragment color * factor) */ - switch (spu.blend.rgb_src_factor) { + switch (spu.blend.rt[0].rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: term1r = fragR; term1g = fragG; @@ -310,7 +310,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Src Alpha term (fragment alpha * factor) */ - switch (spu.blend.alpha_src_factor) { + switch (spu.blend.rt[0].alpha_src_factor) { case PIPE_BLENDFACTOR_ONE: term1a = fragA; break; @@ -338,7 +338,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Dest RGB terms (framebuffer color * factor) */ - switch (spu.blend.rgb_dst_factor) { + switch (spu.blend.rt[0].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: term2r = fbRGBA[0]; term2g = fbRGBA[1]; @@ -394,7 +394,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Dest Alpha term (framebuffer alpha * factor) */ - switch (spu.blend.alpha_dst_factor) { + switch (spu.blend.rt[0].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: term2a = fbRGBA[3]; break; @@ -427,7 +427,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Combine Src/Dest RGB terms */ - switch (spu.blend.rgb_func) { + switch (spu.blend.rt[0].rgb_func) { case PIPE_BLEND_ADD: fragR = spu_add(term1r, term2r); fragG = spu_add(term1g, term2g); @@ -460,7 +460,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Combine Src/Dest A term */ - switch (spu.blend.alpha_func) { + switch (spu.blend.rt[0].alpha_func) { case PIPE_BLEND_ADD: fragA = spu_add(term1a, term2a); break; @@ -527,29 +527,29 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Do color masking */ - if (spu.blend.colormask != 0xf) { + if (spu.blend.rt[0].colormask != 0xf) { uint cmask = 0x0; /* each byte corresponds to a color channel */ /* Form bitmask depending on color buffer format and colormask bits */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - if (spu.blend.colormask & PIPE_MASK_R) + if (spu.blend.rt[0].colormask & PIPE_MASK_R) cmask |= 0x00ff0000; /* red */ - if (spu.blend.colormask & PIPE_MASK_G) + if (spu.blend.rt[0].colormask & PIPE_MASK_G) cmask |= 0x0000ff00; /* green */ - if (spu.blend.colormask & PIPE_MASK_B) + if (spu.blend.rt[0].colormask & PIPE_MASK_B) cmask |= 0x000000ff; /* blue */ - if (spu.blend.colormask & PIPE_MASK_A) + if (spu.blend.rt[0].colormask & PIPE_MASK_A) cmask |= 0xff000000; /* alpha */ break; case PIPE_FORMAT_B8G8R8A8_UNORM: - if (spu.blend.colormask & PIPE_MASK_R) + if (spu.blend.rt[0].colormask & PIPE_MASK_R) cmask |= 0x0000ff00; /* red */ - if (spu.blend.colormask & PIPE_MASK_G) + if (spu.blend.rt[0].colormask & PIPE_MASK_G) cmask |= 0x00ff0000; /* green */ - if (spu.blend.colormask & PIPE_MASK_B) + if (spu.blend.rt[0].colormask & PIPE_MASK_B) cmask |= 0xff000000; /* blue */ - if (spu.blend.colormask & PIPE_MASK_A) + if (spu.blend.rt[0].colormask & PIPE_MASK_A) cmask |= 0x000000ff; /* alpha */ break; default: diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073abf..14987e3c3a2 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a57..087963960df 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d397..3e9804bf8ee 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 46e4338d98a..2ccc5d3e605 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -27,7 +27,7 @@ #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "util/u_memory.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 149393712a3..191a44c3dfc 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -125,7 +125,7 @@ failover_context( struct pipe_context *pipe ) void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); #endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 3f5f5560323..c189d1d82cd 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -28,6 +28,8 @@ /* Authors: Keith Whitwell <[email protected]> */ +#include "util/u_inlines.h" + #include "fo_context.h" @@ -495,7 +497,7 @@ failover_set_vertex_elements(struct pipe_context *pipe, void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct failover_context *failover = failover_context(pipe); diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c index 669964770d4..0f76a59e93a 100644 --- a/src/gallium/drivers/i915/i915_buffer.c +++ b/src/gallium/drivers/i915/i915_buffer.c @@ -23,6 +23,7 @@ * **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_memory.h" #include "i915_screen.h" #include "i915_buffer.h" diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 90530f2826f..0d0859f8f33 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -32,7 +32,6 @@ #include "util/u_clear.h" #include "i915_context.h" -#include "i915_state.h" /** diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 89feeade756..3d45a22b7e7 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -29,13 +29,10 @@ #include "i915_state.h" #include "i915_screen.h" #include "i915_batch.h" -#include "i915_texture.h" -#include "i915_reg.h" #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "pipe/p_screen.h" @@ -84,7 +81,7 @@ i915_draw_range_elements(struct pipe_context *pipe, } - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, i915->current.constants[PIPE_SHADER_VERTEX], (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); @@ -186,7 +183,7 @@ static void i915_destroy(struct pipe_context *pipe) } struct pipe_context * -i915_create_context(struct pipe_screen *screen) +i915_create_context(struct pipe_screen *screen, void *priv) { struct i915_context *i915; @@ -197,6 +194,7 @@ i915_create_context(struct pipe_screen *screen) i915->iws = i915_screen(screen)->iws; i915->base.winsys = NULL; i915->base.screen = screen; + i915->base.priv = priv; i915->base.destroy = i915_destroy; diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 234b441ce6e..1479d2201a9 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -167,7 +167,7 @@ struct i915_depth_stencil_state { }; struct i915_rasterizer_state { - int light_twoside : 1; + unsigned light_twoside : 1; unsigned st; enum interp_mode color_interp; @@ -233,7 +233,8 @@ struct i915_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + /* XXX unneded */ + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; @@ -333,6 +334,11 @@ void i915_init_flush_functions( struct i915_context *i915 ); void i915_init_string_functions( struct i915_context *i915 ); +/************************************************************************ + * i915_context.c + */ +struct pipe_context *i915_create_context(struct pipe_screen *screen, + void *priv); /*********************************************************************** diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index c6e6d6fd313..237654d26b2 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -29,7 +29,6 @@ #include "i915_context.h" #include "i915_debug.h" #include "i915_batch.h" -#include "pipe/internal/p_winsys_screen.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index dd9b86e17b5..8f7484797de 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -72,7 +72,7 @@ void i915_print_ureg(const char *msg, unsigned ureg); #if defined(DEBUG) && defined(FILE_DEBUG_FLAG) -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" static INLINE void I915_DBG( diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c index 9c5b117b6dd..066e7392d18 100644 --- a/src/gallium/drivers/i915/i915_debug_fp.c +++ b/src/gallium/drivers/i915/i915_debug_fp.c @@ -28,8 +28,8 @@ #include "i915_reg.h" #include "i915_debug.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_memory.h" +#include "util/u_simple_screen.h" +#include "util/u_debug.h" static void diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 6b832140a87..cad4109ee6b 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -41,7 +41,7 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "util/u_debug.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_fifo.h" diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index d4ee8f5339b..c450854c982 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_string.h" @@ -117,6 +117,12 @@ i915_get_param(struct pipe_screen *screen, int param) return 8; /* max 128x128x128 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 11; /* max 1024x1024 */ + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: return 0; } @@ -288,6 +294,8 @@ i915_create_screen(struct intel_winsys *iws, uint pci_id) is->base.get_paramf = i915_get_paramf; is->base.is_format_supported = i915_is_format_supported; + is->base.context_create = i915_create_context; + is->base.fence_reference = i915_fence_reference; is->base.fence_signalled = i915_fence_signalled; is->base.fence_finish = i915_fence_finish; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 5f5b6f8e185..beb26e996a4 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -30,15 +30,13 @@ #include "draw/draw_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" #include "i915_reg.h" -#include "i915_state.h" #include "i915_state_inlines.h" #include "i915_fpc.h" @@ -105,13 +103,13 @@ i915_create_blend_state(struct pipe_context *pipe, struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); { - unsigned eqRGB = blend->rgb_func; - unsigned srcRGB = blend->rgb_src_factor; - unsigned dstRGB = blend->rgb_dst_factor; + unsigned eqRGB = blend->rt[0].rgb_func; + unsigned srcRGB = blend->rt[0].rgb_src_factor; + unsigned dstRGB = blend->rt[0].rgb_dst_factor; - unsigned eqA = blend->alpha_func; - unsigned srcA = blend->alpha_src_factor; - unsigned dstA = blend->alpha_dst_factor; + unsigned eqA = blend->rt[0].alpha_func; + unsigned srcA = blend->rt[0].alpha_src_factor; + unsigned dstA = blend->rt[0].alpha_dst_factor; /* Special handling for MIN/MAX filter modes handled at * state_tracker level. @@ -148,22 +146,22 @@ i915_create_blend_state(struct pipe_context *pipe, if (blend->dither) cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; - if ((blend->colormask & PIPE_MASK_R) == 0) + if ((blend->rt[0].colormask & PIPE_MASK_R) == 0) cso_data->LIS5 |= S5_WRITEDISABLE_RED; - if ((blend->colormask & PIPE_MASK_G) == 0) + if ((blend->rt[0].colormask & PIPE_MASK_G) == 0) cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; - if ((blend->colormask & PIPE_MASK_B) == 0) + if ((blend->rt[0].colormask & PIPE_MASK_B) == 0) cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; - if ((blend->colormask & PIPE_MASK_A) == 0) + if ((blend->rt[0].colormask & PIPE_MASK_A) == 0) cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; - if (blend->blend_enable) { - unsigned funcRGB = blend->rgb_func; - unsigned srcRGB = blend->rgb_src_factor; - unsigned dstRGB = blend->rgb_dst_factor; + if (blend->rt[0].blend_enable) { + unsigned funcRGB = blend->rt[0].rgb_func; + unsigned srcRGB = blend->rt[0].rgb_src_factor; + unsigned dstRGB = blend->rt[0].rgb_dst_factor; cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | @@ -518,7 +516,7 @@ static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) static void i915_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct i915_context *i915 = i915_context(pipe); struct pipe_screen *screen = pipe->screen; @@ -538,13 +536,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, */ if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(screen, buf->buffer, + if (buf->size && + (mapped = pipe_buffer_map(screen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(i915->current.constants[shader], mapped, buf->buffer->size); - pipe_buffer_unmap(screen, buf->buffer); + memcpy(i915->current.constants[shader], mapped, buf->size); + pipe_buffer_unmap(screen, buf); i915->current.num_user_constants[shader] - = buf->buffer->size / (4 * sizeof(float)); + = buf->size / (4 * sizeof(float)); } else { i915->current.num_user_constants[shader] = 0; diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 03dd5091a61..f5b0e9f011e 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -33,7 +33,6 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "i915_fpc.h" diff --git a/src/gallium/drivers/i915/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h index 378de8f9c48..b589117fbfe 100644 --- a/src/gallium/drivers/i915/i915_state_inlines.h +++ b/src/gallium/drivers/i915/i915_state_inlines.h @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_defines.h" +#include "util/u_debug.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index cbac4175c8f..e5c6d87215b 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -27,7 +27,6 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "util/u_memory.h" #include "i915_state_inlines.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index c693eb30e87..1ff6b9f4c63 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -27,14 +27,8 @@ #include "i915_context.h" #include "i915_blit.h" -#include "i915_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" #include "util/u_format.h" -#include "util/u_tile.h" -#include "util/u_rect.h" /* Assumes all values are within bounds -- no checking at this level - diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index 50a9e19094b..e101c8683ec 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -33,15 +33,13 @@ #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "i915_context.h" #include "i915_texture.h" -#include "i915_debug.h" #include "i915_screen.h" #include "intel_winsys.h" diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h index c6bf6e6f7f1..b3a802b0e29 100644 --- a/src/gallium/drivers/i915/intel_winsys.h +++ b/src/gallium/drivers/i915/intel_winsys.h @@ -203,10 +203,6 @@ struct intel_winsys { */ struct pipe_screen *i915_create_screen(struct intel_winsys *iws, unsigned pci_id); -/** - * Create a i915 pipe_context. - */ -struct pipe_context *i915_create_context(struct pipe_screen *screen); /** * Get the intel_winsys buffer backing the texture. diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 22607dc6083..003b1fd5bf0 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -155,7 +155,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, enum pipe_error brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, struct brw_winsys_buffer *buffer, - uint32_t usage, + enum brw_buffer_usage usage, uint32_t delta) { int ret; diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 7473f5bea4d..6ca9f617f5e 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -64,12 +64,12 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch); * Consider it a convenience function wrapping multple * intel_buffer_dword() calls. */ -int brw_batchbuffer_data(struct brw_batchbuffer *batch, +enum pipe_error brw_batchbuffer_data(struct brw_batchbuffer *batch, const void *data, GLuint bytes, enum cliprect_mode cliprect_mode); -int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, +enum pipe_error brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, struct brw_winsys_buffer *buffer, enum brw_buffer_usage usage, uint32_t offset); diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 3e070f5591a..4a543276f54 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_state.h" -#include "brw_defines.h" static enum pipe_error prepare_cc_vp( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index d67a1a62633..ccba205e8c7 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -38,7 +38,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_state.h" #include "brw_pipe_rast.h" #include "brw_clip.h" diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 54282d975ed..66caadc4d53 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -33,7 +33,6 @@ #include "brw_defines.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c index e0a5330556d..124156c1b50 100644 --- a/src/gallium/drivers/i965/brw_clip_point.c +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -31,7 +31,6 @@ #include "brw_defines.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 4cde7294ea0..069524bc14f 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -31,7 +31,6 @@ #include "brw_defines.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" static void release_tmps( struct brw_clip_compile *c ) diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 97a57103105..23e51ee9bcd 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -32,7 +32,6 @@ #include "brw_defines.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index e67551882dc..3dbe2b91308 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -31,10 +31,10 @@ #include "pipe/p_context.h" +#include "util/u_inlines.h" #include "util/u_simple_list.h" #include "brw_context.h" -#include "brw_defines.h" #include "brw_draw.h" #include "brw_state.h" #include "brw_batchbuffer.h" @@ -102,7 +102,8 @@ static void brw_destroy_context( struct pipe_context *pipe ) } -struct pipe_context *brw_create_context(struct pipe_screen *screen) +struct pipe_context *brw_create_context(struct pipe_screen *screen, + void *priv) { struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); @@ -112,6 +113,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) } brw->base.screen = screen; + brw->base.priv = priv; brw->base.destroy = brw_destroy_context; brw->sws = brw_screen(screen)->sws; brw->chipset = brw_screen(screen)->chipset; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 8c006bb95b2..19fda423de0 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -837,6 +837,10 @@ int brw_upload_urb_fence(struct brw_context *brw); */ int brw_upload_cs_urb_state(struct brw_context *brw); +/* brw_context.c + */ +struct pipe_context *brw_create_context(struct pipe_screen *screen, + void *priv); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 3f031577d5a..4b215a001c4 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -36,9 +36,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" -#include "brw_util.h" #include "brw_debug.h" -#include "brw_screen.h" /** diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 65db27248b1..9c2ccfff652 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -366,6 +366,7 @@ static int format (FILE *f, char *format, ...) va_start (args, format); vsnprintf (buf, sizeof (buf) - 1, format, args); + va_end (args); string (f, buf); return 0; } diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index ea8d39adaf0..9bad61ef72e 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -26,6 +26,7 @@ **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_upload_mgr.h" @@ -34,7 +35,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_debug.h" -#include "brw_screen.h" #include "brw_batchbuffer.h" diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index a27da5f1c17..d59261557b5 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "pipe/p_context.h" +#include "util/u_inlines.h" #include "util/u_upload_mgr.h" #include "util/u_math.h" diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 921b201bae2..06826635a8a 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -34,7 +34,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_state.h" #include "brw_gs.h" diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c index fd8e2accedd..9b58773b3bd 100644 --- a/src/gallium/drivers/i965/brw_gs_emit.c +++ b/src/gallium/drivers/i965/brw_gs_emit.c @@ -35,7 +35,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_gs.h" static void brw_gs_alloc_regs( struct brw_gs_compile *c, diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index b759a910b63..21f786f8715 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -118,14 +118,14 @@ static void *brw_create_blend_state( struct pipe_context *pipe, blend->cc2.logicop_enable = 1; blend->cc5.logicop_func = translate_logicop(templ->logicop_func); } - else if (templ->blend_enable) { - blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor); - blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor); - blend->cc6.blend_function = translate_blend_equation(templ->rgb_func); + else if (templ->rt[0].blend_enable) { + blend->cc6.dest_blend_factor = translate_blend_factor(templ->rt[0].rgb_dst_factor); + blend->cc6.src_blend_factor = translate_blend_factor(templ->rt[0].rgb_src_factor); + blend->cc6.blend_function = translate_blend_equation(templ->rt[0].rgb_func); - blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor); - blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor); - blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func); + blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->rt[0].alpha_dst_factor); + blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->rt[0].alpha_src_factor); + blend->cc5.ia_blend_function = translate_blend_equation(templ->rt[0].alpha_func); blend->cc3.blend_enable = 1; blend->cc3.ia_blend_enable = @@ -146,10 +146,10 @@ static void *brw_create_blend_state( struct pipe_context *pipe, /* Per-surface color mask -- just follow global state: */ - blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 0 : 1; - blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1; - blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 0 : 1; - blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1; + blend->ss0.writedisable_red = (templ->rt[0].colormask & PIPE_MASK_R) ? 0 : 1; + blend->ss0.writedisable_green = (templ->rt[0].colormask & PIPE_MASK_G) ? 0 : 1; + blend->ss0.writedisable_blue = (templ->rt[0].colormask & PIPE_MASK_B) ? 0 : 1; + blend->ss0.writedisable_alpha = (templ->rt[0].colormask & PIPE_MASK_A) ? 0 : 1; return (void *)blend; } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 5d4e5025f97..a90b7c73f69 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -1,9 +1,9 @@ #include "util/u_math.h" #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "util/u_inlines.h" #include "brw_context.h" -#include "brw_debug.h" /** * called from intelDrawBuffer() diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 81712798a5d..6aab5610043 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -4,10 +4,10 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "util/u_inlines.h" #include "brw_context.h" #include "brw_defines.h" -#include "brw_debug.h" diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index bb32d90e331..fe445b9982e 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -29,13 +29,13 @@ * Keith Whitwell <[email protected]> */ +#include "util/u_inlines.h" #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" #include "brw_context.h" -#include "brw_util.h" #include "brw_wm.h" @@ -262,7 +262,7 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) static void brw_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct brw_context *brw = brw_context(pipe); @@ -270,13 +270,13 @@ static void brw_set_constant_buffer(struct pipe_context *pipe, if (shader == PIPE_SHADER_FRAGMENT) { pipe_buffer_reference( &brw->curr.fragment_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS; } else { pipe_buffer_reference( &brw->curr.vertex_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS; } diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 0ecacac9a3a..184cd490e55 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_string.h" @@ -138,6 +138,9 @@ brw_get_name(struct pipe_screen *screen) case PCI_CHIP_ILM_G: chipset = "ILM_G"; break; + default: + chipset = "unknown"; + break; } util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); @@ -172,6 +175,12 @@ brw_get_param(struct pipe_screen *screen, int param) return 8; /* max 128x128x128 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 11; /* max 1024x1024 */ + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: return 0; } @@ -388,6 +397,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) bscreen->base.get_param = brw_get_param; bscreen->base.get_paramf = brw_get_paramf; bscreen->base.is_format_supported = brw_is_format_supported; + bscreen->base.context_create = brw_create_context; bscreen->base.fence_reference = brw_fence_reference; bscreen->base.fence_signalled = brw_fence_signalled; bscreen->base.fence_finish = brw_fence_finish; diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c index d8141a3f5b9..0b38885f40c 100644 --- a/src/gallium/drivers/i965/brw_screen_buffers.c +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -4,7 +4,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "brw_screen.h" #include "brw_winsys.h" diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index fc3102b5318..9cceb4dbe52 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -36,7 +36,6 @@ #include "brw_context.h" #include "brw_pipe_rast.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_sf.h" #include "brw_state.h" diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 3b85725e368..497634ec9ed 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -35,7 +35,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_sf.h" diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 16b643ceb28..85c20076fb8 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -59,7 +59,6 @@ #include "brw_debug.h" #include "brw_state.h" -#include "brw_batchbuffer.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c index 458058d668d..1fd2e297137 100644 --- a/src/gallium/drivers/i965/brw_util.c +++ b/src/gallium/drivers/i965/brw_util.c @@ -30,8 +30,6 @@ */ -#include "brw_util.h" -#include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index e3ea5a3a135..ca8ee79550d 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -33,9 +33,7 @@ #include "brw_context.h" #include "brw_vs.h" -#include "brw_util.h" #include "brw_state.h" -#include "brw_pipe_rast.h" diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index 177a5170d23..004e3cb4e6f 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -31,7 +31,6 @@ #include "brw_context.h" #include "brw_state.h" -#include "brw_defines.h" #include "brw_winsys.h" /* XXX: disabled true constant buffer functionality diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index a242e31218a..c82d00f4a47 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -28,7 +28,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_defines.h" -#include "pipe/p_refcnt.h" +#include "util/u_inlines.h" struct brw_winsys; struct pipe_fence_handle; @@ -256,10 +256,6 @@ bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf) */ struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id); -/** - * Create a brw pipe_context. - */ -struct pipe_context *brw_create_context(struct pipe_screen *screen); /** * Get the brw_winsys buffer backing the texture. diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index fdf820a9aae..5164c90ed60 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_screen.h" -#include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" #include "brw_debug.h" diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 9c5b527f897..9c67759ad0b 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -41,7 +41,6 @@ #include "tgsi/tgsi_util.h" #include "brw_wm.h" -#include "brw_util.h" #include "brw_debug.h" diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index f92b8198ed3..b01a7f194b7 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -34,7 +34,6 @@ #include "brw_batchbuffer.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_defines.h" #include "brw_screen.h" diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 9f5b4e63236..9955380e1fe 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -29,7 +29,6 @@ #include "pipe/p_context.h" #include "util/u_memory.h" -#include "id_public.h" #include "id_context.h" #include "id_objects.h" @@ -404,17 +403,17 @@ static void identity_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *_buffer) + struct pipe_buffer *_buffer) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - struct pipe_constant_buffer unwrapped_buffer; - struct pipe_constant_buffer *buffer = NULL; + struct pipe_buffer *unwrapped_buffer; + struct pipe_buffer *buffer = NULL; - /* unwrap the input state */ + /* XXX hmm? unwrap the input state */ if (_buffer) { - unwrapped_buffer.buffer = identity_buffer_unwrap(_buffer->buffer); - buffer = &unwrapped_buffer; + unwrapped_buffer = identity_buffer_unwrap(_buffer); + buffer = unwrapped_buffer; } pipe->set_constant_buffer(pipe, @@ -692,7 +691,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.winsys = NULL; id_pipe->base.screen = _screen; - id_pipe->base.priv = pipe->priv; + id_pipe->base.priv = pipe->priv; /* expose wrapped data */ id_pipe->base.draw = NULL; id_pipe->base.destroy = identity_destroy; diff --git a/src/gallium/drivers/identity/id_context.h b/src/gallium/drivers/identity/id_context.h index 75b73fc7df6..6d3c1899d59 100644 --- a/src/gallium/drivers/identity/id_context.h +++ b/src/gallium/drivers/identity/id_context.h @@ -39,6 +39,10 @@ struct identity_context { }; +struct pipe_context * +identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe); + + static INLINE struct identity_context * identity_context(struct pipe_context *pipe) { diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/identity/id_drm.c index 14f68ac0d00..12b516b445f 100644 --- a/src/gallium/drivers/identity/id_drm.c +++ b/src/gallium/drivers/identity/id_drm.c @@ -63,22 +63,6 @@ identity_drm_create_screen(struct drm_api *_api, int fd, return identity_screen_create(screen); } -static struct pipe_context * -identity_drm_create_context(struct drm_api *_api, - struct pipe_screen *_screen) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_drm_api *id_api = identity_drm_api(_api); - struct pipe_screen *screen = id_screen->screen; - struct drm_api *api = id_api->api; - struct pipe_context *pipe; - - pipe = api->create_context(api, screen); - - pipe = identity_context_create(_screen, pipe); - - return pipe; -} static struct pipe_texture * identity_drm_texture_from_shared_handle(struct drm_api *_api, @@ -159,7 +143,6 @@ identity_drm_create(struct drm_api *api) goto error; id_api->base.create_screen = identity_drm_create_screen; - id_api->base.create_context = identity_drm_create_context; id_api->base.texture_from_shared_handle = identity_drm_texture_from_shared_handle; id_api->base.shared_handle_from_texture = identity_drm_shared_handle_from_texture; id_api->base.local_handle_from_texture = identity_drm_local_handle_from_texture; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index bc9bc7121d5..2b1a60c1bf1 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -25,9 +25,9 @@ * **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_memory.h" -#include "id_public.h" #include "id_screen.h" #include "id_objects.h" diff --git a/src/gallium/drivers/identity/id_public.h b/src/gallium/drivers/identity/id_public.h index 3d2862eaa01..d0d5847c61c 100644 --- a/src/gallium/drivers/identity/id_public.h +++ b/src/gallium/drivers/identity/id_public.h @@ -34,7 +34,4 @@ struct pipe_context; struct pipe_screen * identity_screen_create(struct pipe_screen *screen); -struct pipe_context * -identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe); - #endif /* ID_PUBLIC_H */ diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 53eae3ef544..b85492114a3 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -32,6 +32,7 @@ #include "id_public.h" #include "id_screen.h" +#include "id_context.h" #include "id_objects.h" @@ -103,6 +104,20 @@ identity_screen_is_format_supported(struct pipe_screen *_screen, geom_flags); } +static struct pipe_context * +identity_screen_context_create(struct pipe_screen *_screen, + void *priv) +{ + struct identity_screen *id_screen = identity_screen(_screen); + struct pipe_screen *screen = id_screen->screen; + struct pipe_context *result; + + result = screen->context_create(screen, priv); + if (result) + return identity_context_create(_screen, result); + return NULL; +} + static struct pipe_texture * identity_screen_texture_create(struct pipe_screen *_screen, const struct pipe_texture *templat) @@ -478,6 +493,7 @@ identity_screen_create(struct pipe_screen *screen) id_screen->base.get_param = identity_screen_get_param; id_screen->base.get_paramf = identity_screen_get_paramf; id_screen->base.is_format_supported = identity_screen_is_format_supported; + id_screen->base.context_create = identity_screen_context_create; id_screen->base.texture_create = identity_screen_texture_create; id_screen->base.texture_blanket = identity_screen_texture_blanket; id_screen->base.texture_destroy = identity_screen_texture_destroy; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 7c6e46006b9..e880042b71e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,42 +3,28 @@ include $(TOP)/configs/current LIBNAME = llvmpipe -CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS C_SOURCES = \ - lp_bld_alpha.c \ - lp_bld_arit.c \ - lp_bld_blend_aos.c \ - lp_bld_blend_logicop.c \ - lp_bld_blend_soa.c \ - lp_bld_const.c \ - lp_bld_conv.c \ - lp_bld_debug.c \ - lp_bld_depth.c \ - lp_bld_flow.c \ - lp_bld_format_aos.c \ - lp_bld_format_query.c \ - lp_bld_format_soa.c \ - lp_bld_interp.c \ - lp_bld_intr.c \ - lp_bld_logic.c \ - lp_bld_pack.c \ - lp_bld_sample.c \ - lp_bld_sample_soa.c \ - lp_bld_swizzle.c \ - lp_bld_struct.c \ - lp_bld_tgsi_soa.c \ - lp_bld_type.c \ lp_buffer.c \ lp_clear.c \ lp_context.c \ lp_draw_arrays.c \ + lp_fence.c \ lp_flush.c \ lp_jit.c \ - lp_prim_vbuf.c \ - lp_setup.c \ + lp_perf.c \ lp_query.c \ + lp_rast.c \ + lp_rast_tri.c \ + lp_scene.c \ + lp_scene_queue.c \ lp_screen.c \ + lp_setup.c \ + lp_setup_line.c \ + lp_setup_point.c \ + lp_setup_tri.c \ + lp_setup_vbuf.c \ lp_state_blend.c \ lp_state_clip.c \ lp_state_derived.c \ @@ -49,16 +35,32 @@ C_SOURCES = \ lp_state_vertex.c \ lp_state_vs.c \ lp_surface.c \ - lp_tex_cache.c \ lp_tex_sample_llvm.c \ lp_texture.c \ - lp_tile_cache.c \ + lp_tile_surface.c \ lp_tile_soa.c CPP_SOURCES = \ - lp_bld_misc.cpp + include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ + + +# to make a .s file to inspect assembly code +.c.s: + $(CC) -S $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< + + +testprogs := lp_test_format \ + lp_test_blend \ + lp_test_conv + +LIBS += $(GL_LIB_DEPS) -L. -lllvmpipe -L../../auxiliary/ -lgallium + +$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a + $(LD) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group + +default: $(testprogs) diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 6bb545a501f..a39283e5e86 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -21,40 +21,25 @@ env.CodeGenerate( llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ - 'lp_bld_alpha.c', - 'lp_bld_arit.c', - 'lp_bld_blend_aos.c', - 'lp_bld_blend_logicop.c', - 'lp_bld_blend_soa.c', - 'lp_bld_const.c', - 'lp_bld_conv.c', - 'lp_bld_debug.c', - 'lp_bld_depth.c', - 'lp_bld_flow.c', - 'lp_bld_format_aos.c', - 'lp_bld_format_query.c', - 'lp_bld_format_soa.c', - 'lp_bld_interp.c', - 'lp_bld_intr.c', - 'lp_bld_misc.cpp', - 'lp_bld_pack.c', - 'lp_bld_sample.c', - 'lp_bld_sample_soa.c', - 'lp_bld_struct.c', - 'lp_bld_logic.c', - 'lp_bld_swizzle.c', - 'lp_bld_tgsi_soa.c', - 'lp_bld_type.c', 'lp_buffer.c', 'lp_clear.c', 'lp_context.c', 'lp_draw_arrays.c', + 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_vbuf.c', - 'lp_setup.c', + 'lp_perf.c', 'lp_query.c', + 'lp_rast.c', + 'lp_rast_tri.c', + 'lp_scene.c', + 'lp_scene_queue.c', 'lp_screen.c', + 'lp_setup.c', + 'lp_setup_line.c', + 'lp_setup_point.c', + 'lp_setup_tri.c', + 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', @@ -65,29 +50,28 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vertex.c', 'lp_state_vs.c', 'lp_surface.c', - 'lp_tex_cache.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', - 'lp_tile_cache.c', 'lp_tile_soa.c', ]) -env = env.Clone() +if env['platform'] != 'embedded': + env = env.Clone() -env.Prepend(LIBS = [llvmpipe] + gallium) + env.Prepend(LIBS = [llvmpipe] + gallium) -tests = [ - 'format', - 'blend', - 'conv', -] + tests = [ + 'format', + 'blend', + 'conv', + ] -for test in tests: - target = env.Program( - target = 'lp_test_' + test, - source = ['lp_test_' + test + '.c', 'lp_test_main.c'], - ) - env.InstallProgram(target) + for test in tests: + target = env.Program( + target = 'lp_test_' + test, + source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + ) + env.InstallProgram(target) -Export('llvmpipe') + Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.c b/src/gallium/drivers/llvmpipe/lp_buffer.c index 66f1f8e1383..9eda9720818 100644 --- a/src/gallium/drivers/llvmpipe/lp_buffer.c +++ b/src/gallium/drivers/llvmpipe/lp_buffer.c @@ -26,12 +26,12 @@ **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_math.h" #include "lp_winsys.h" #include "lp_screen.h" -#include "lp_texture.h" #include "lp_buffer.h" @@ -108,32 +108,6 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen, } -static void -llvmpipe_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ -} - - -static int -llvmpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static int -llvmpipe_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - void llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen) { @@ -142,9 +116,4 @@ llvmpipe_init_screen_buffer_funcs(struct pipe_screen *screen) screen->buffer_map = llvmpipe_buffer_map; screen->buffer_unmap = llvmpipe_buffer_unmap; screen->buffer_destroy = llvmpipe_buffer_destroy; - - screen->fence_reference = llvmpipe_fence_reference; - screen->fence_signalled = llvmpipe_fence_signalled; - screen->fence_finish = llvmpipe_fence_finish; - } diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 08d9f2e2735..3e8c4109251 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -33,12 +33,9 @@ #include "pipe/p_defines.h" -#include "util/u_pack_color.h" #include "lp_clear.h" #include "lp_context.h" -#include "lp_surface.h" -#include "lp_state.h" -#include "lp_tile_cache.h" +#include "lp_setup.h" /** @@ -46,37 +43,16 @@ * No masking, no scissor (clear entire buffer). */ void -llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil) +llvmpipe_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - union util_color uc; - unsigned cv; - uint i; if (llvmpipe->no_rast) return; -#if 0 - llvmpipe_update_derived(llvmpipe); /* not needed?? */ -#endif - - if (buffers & PIPE_CLEAR_COLOR) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - struct pipe_surface *ps = llvmpipe->framebuffer.cbufs[i]; - - util_pack_color(rgba, ps->format, &uc); - lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, uc.ui); - } - llvmpipe->dirty_render_cache = TRUE; - } - - if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { - struct pipe_surface *ps = llvmpipe->framebuffer.zsbuf; - - cv = util_pack_z_stencil(ps->format, depth, stencil); - - /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); - } + lp_setup_clear( llvmpipe->setup, rgba, depth, stencil, buffers ); } diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 1cc3c9227cc..43d610631da 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -33,70 +33,22 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "pipe/p_defines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_vbuf.h" +#include "lp_perf.h" #include "lp_state.h" #include "lp_surface.h" -#include "lp_tile_cache.h" -#include "lp_tex_cache.h" #include "lp_texture.h" #include "lp_winsys.h" #include "lp_query.h" +#include "lp_setup.h" -/** - * Map any drawing surfaces which aren't already mapped - */ -void -llvmpipe_map_transfers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - struct pipe_surface *zsbuf = lp->framebuffer.zsbuf; - unsigned i; - - for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { - lp_tile_cache_map_transfers(lp->cbuf_cache[i]); - } - - if(zsbuf) { - if(!lp->zsbuf_transfer) - lp->zsbuf_transfer = screen->get_tex_transfer(screen, zsbuf->texture, - zsbuf->face, zsbuf->level, zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, zsbuf->width, zsbuf->height); - if(lp->zsbuf_transfer && !lp->zsbuf_map) - lp->zsbuf_map = screen->transfer_map(screen, lp->zsbuf_transfer); - - } -} - - -/** - * Unmap any mapped drawing surfaces - */ -void -llvmpipe_unmap_transfers(struct llvmpipe_context *lp) -{ - uint i; - - for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { - lp_tile_cache_unmap_transfers(lp->cbuf_cache[i]); - } - - if(lp->zsbuf_transfer) { - struct pipe_screen *screen = lp->pipe.screen; - - if(lp->zsbuf_map) { - screen->transfer_unmap(screen, lp->zsbuf_transfer); - lp->zsbuf_map = NULL; - } - } -} static void llvmpipe_destroy( struct pipe_context *pipe ) @@ -104,28 +56,30 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); uint i; + lp_print_counters(); + + /* This will also destroy llvmpipe->setup: + */ if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]); pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); } + pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]); pipe_texture_reference(&llvmpipe->texture[i], NULL); } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - lp_destroy_tex_tile_cache(llvmpipe->vertex_tex_cache[i]); pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL); } for (i = 0; i < Elements(llvmpipe->constants); i++) { - if (llvmpipe->constants[i].buffer) { - pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); + if (llvmpipe->constants[i]) { + pipe_buffer_reference(&llvmpipe->constants[i], NULL); } } @@ -138,33 +92,8 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, unsigned face, unsigned level) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); - unsigned i; - - /* check if any of the bound drawing surfaces are this texture */ - if(llvmpipe->dirty_render_cache) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - if(llvmpipe->framebuffer.cbufs[i] && - llvmpipe->framebuffer.cbufs[i]->texture == texture) - return PIPE_REFERENCED_FOR_WRITE; - } - if(llvmpipe->framebuffer.zsbuf && - llvmpipe->framebuffer.zsbuf->texture == texture) - return PIPE_REFERENCED_FOR_WRITE; - } - /* check if any of the tex_cache textures are this texture */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (llvmpipe->tex_cache[i] && - llvmpipe->tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; - } - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - if (llvmpipe->vertex_tex_cache[i] && - llvmpipe->vertex_tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; - } - - return PIPE_UNREFERENCED; + return lp_setup_is_texture_referenced(llvmpipe->setup, texture); } static unsigned int @@ -175,10 +104,9 @@ llvmpipe_is_buffer_referenced( struct pipe_context *pipe, } struct pipe_context * -llvmpipe_create( struct pipe_screen *screen ) +llvmpipe_create_context( struct pipe_screen *screen, void *priv ) { struct llvmpipe_context *llvmpipe; - uint i; llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16); if (!llvmpipe) @@ -190,6 +118,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; + llvmpipe->pipe.priv = priv; llvmpipe->pipe.destroy = llvmpipe_destroy; /* state setters */ @@ -242,19 +171,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.is_buffer_referenced = llvmpipe_is_buffer_referenced; llvmpipe_init_query_funcs( llvmpipe ); - llvmpipe_init_texture_funcs( llvmpipe ); - - /* - * Alloc caches for accessing drawing surfaces and textures. - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - llvmpipe->cbuf_cache[i] = lp_create_tile_cache( screen ); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen ); - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) - llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen); - /* * Create drawing context and plug our rendering stage into it. @@ -268,19 +184,11 @@ llvmpipe_create( struct pipe_screen *screen ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); - if (!llvmpipe->vbuf_backend) - goto fail; - - llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); - if (!llvmpipe->vbuf) + llvmpipe->setup = lp_setup_create( screen, + llvmpipe->draw ); + if (!llvmpipe->setup) goto fail; - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); - draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); - - - /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); @@ -292,6 +200,8 @@ llvmpipe_create( struct pipe_screen *screen ) lp_init_surface_functions(llvmpipe); + lp_reset_counters(); + return &llvmpipe->pipe; fail: diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 6411797cf5d..3bde485ac0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -42,12 +42,10 @@ struct llvmpipe_vbuf_render; struct draw_context; struct draw_stage; -struct llvmpipe_tile_cache; -struct llvmpipe_tex_tile_cache; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; - +struct setup_context; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ @@ -62,9 +60,9 @@ struct llvmpipe_context { const struct lp_vertex_shader *vs; /** Other rendering state */ - struct pipe_blend_color blend_color[4][16]; + struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; @@ -94,52 +92,26 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; - struct vertex_info vertex_info_vbuf; /** Which vertex shader output slot contains point size */ int psize_slot; - /* The reduced version of the primitive supplied by the state - * tracker. - */ - unsigned reduced_api_prim; - - /* The reduced primitive after unfilled triangles, wide-line - * decomposition, etc, are taken into account. This is the - * primitive actually rasterized. - */ - unsigned reduced_prim; - - /** Derived from scissor and surface bounds: */ - struct pipe_scissor_state cliprect; - - unsigned line_stipple_counter; + /** The tiling engine */ + struct setup_context *setup; /** The primitive drawing context */ struct draw_context *draw; - /** Draw module backend */ - struct vbuf_render *vbuf_backend; - struct draw_stage *vbuf; - - boolean dirty_render_cache; - - struct llvmpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; - - /* TODO: we shouldn't be using external interfaces internally like this */ - struct pipe_transfer *zsbuf_transfer; - uint8_t *zsbuf_map; - unsigned tex_timestamp; - struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; - struct llvmpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; + boolean no_rast; - unsigned no_rast : 1; - - struct lp_jit_context jit_context; }; +struct pipe_context * +llvmpipe_create_context( struct pipe_screen *screen, void *priv ); + + static INLINE struct llvmpipe_context * llvmpipe_context( struct pipe_context *pipe ) { diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 74b27574942..ee818143610 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -45,6 +45,11 @@ st_print_current(void); #define DEBUG_QUERY 0x40 #define DEBUG_SCREEN 0x80 #define DEBUG_JIT 0x100 +#define DEBUG_SHOW_TILES 0x200 +#define DEBUG_SHOW_SUBTILES 0x400 +#define DEBUG_COUNTERS 0x800 +#define DEBUG_NO_LLVM_OPT 0x1000 + #ifdef DEBUG extern int LP_DEBUG; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index c152b4413fc..3dd68d5794e 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -33,8 +33,6 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" #include "util/u_prim.h" #include "lp_buffer.h" @@ -70,13 +68,9 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, struct draw_context *draw = lp->draw; unsigned i; - lp->reduced_api_prim = u_reduced_prim(mode); - if (lp->dirty) llvmpipe_update_derived( lp ); - llvmpipe_map_transfers(lp); - /* * Map vertex buffers */ @@ -118,10 +112,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, * internally when this condition is seen?) */ draw_flush(draw); - - /* Note: leave drawing surfaces mapped */ - - lp->dirty_render_cache = TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c new file mode 100644 index 00000000000..525c117f316 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "lp_fence.h" + + +struct lp_fence * +lp_fence_create(unsigned rank) +{ + struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + + pipe_reference_init(&fence->reference, 1); + + pipe_mutex_init(fence->mutex); + pipe_condvar_init(fence->signalled); + + fence->rank = rank; + + return fence; +} + + +static void +lp_fence_destroy(struct lp_fence *fence) +{ + pipe_mutex_destroy(fence->mutex); + pipe_condvar_destroy(fence->signalled); + FREE(fence); +} + + +static void +llvmpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct lp_fence *old = (struct lp_fence *) *ptr; + struct lp_fence *f = (struct lp_fence *) fence; + + if (pipe_reference(&old->reference, &f->reference)) { + lp_fence_destroy(old); + } +} + + +static int +llvmpipe_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence; + + return f->count == f->rank; +} + + +static int +llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + unsigned flag) +{ + struct lp_fence *fence = (struct lp_fence *) fence_handle; + + pipe_mutex_lock(fence->mutex); + while (fence->count < fence->rank) { + pipe_condvar_wait(fence->signalled, fence->mutex); + } + pipe_mutex_unlock(fence->mutex); + + return 0; +} + + + + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen) +{ + screen->fence_reference = llvmpipe_fence_reference; + screen->fence_signalled = llvmpipe_fence_signalled; + screen->fence_finish = llvmpipe_fence_finish; +} diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h new file mode 100644 index 00000000000..c90e6de423b --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_FENCE_H +#define LP_FENCE_H + + +#include "os/os_thread.h" +#include "pipe/p_state.h" + + +struct pipe_screen; + + +struct lp_fence +{ + struct pipe_reference reference; + + pipe_mutex mutex; + pipe_condvar signalled; + + unsigned rank; + unsigned count; +}; + + +struct lp_fence * +lp_fence_create(unsigned rank); + + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); + + +#endif /* LP_FENCE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index cd8381fe308..bf832433be1 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -34,11 +34,7 @@ #include "draw/draw_context.h" #include "lp_flush.h" #include "lp_context.h" -#include "lp_surface.h" -#include "lp_state.h" -#include "lp_tile_cache.h" -#include "lp_tex_cache.h" -#include "lp_winsys.h" +#include "lp_setup.h" void @@ -47,56 +43,52 @@ llvmpipe_flush( struct pipe_context *pipe, struct pipe_fence_handle **fence ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - uint i; draw_flush(llvmpipe->draw); - if (flags & PIPE_FLUSH_SWAPBUFFERS) { - /* If this is a swapbuffers, just flush color buffers. - * - * The zbuffer changes are not discarded, but held in the cache - * in the hope that a later clear will wipe them out. - */ - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) { - lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); - lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); - } + if (fence) { + if ((flags & (PIPE_FLUSH_SWAPBUFFERS | + PIPE_FLUSH_RENDER_CACHE))) { + /* if we're going to flush the setup/rasterization modules, emit + * a fence. + * XXX this (and the code below) may need fine tuning... + */ + *fence = lp_setup_fence( llvmpipe->setup ); + } + else { + *fence = NULL; + } + } - /* Need this call for hardware buffers before swapbuffers. - * - * there should probably be another/different flush-type function - * that's called before swapbuffers because we don't always want - * to unmap surfaces when flushing. - */ - llvmpipe_unmap_transfers(llvmpipe); + /* XXX the lp_setup_flush(flags) param is not a bool, and it's ignored + * at this time! + */ + if (flags & PIPE_FLUSH_SWAPBUFFERS) { + lp_setup_flush( llvmpipe->setup, FALSE ); } else if (flags & PIPE_FLUSH_RENDER_CACHE) { - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) { - lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); - lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); - } - - /* FIXME: untile zsbuf! */ - - llvmpipe->dirty_render_cache = FALSE; + lp_setup_flush( llvmpipe->setup, TRUE ); } /* Enable to dump BMPs of the color/depth buffers each frame */ #if 0 - if(flags & PIPE_FLUSH_FRAME) { + if (flags & PIPE_FLUSH_FRAME) { static unsigned frame_no = 1; - static char filename[256]; - util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.cbufs[0]); - util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, llvmpipe->framebuffer.zsbuf); + char filename[256]; + unsigned i; + + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); + debug_dump_surface(filename, llvmpipe->framebuffer.cbufs[i]); + } + + if (0) { + util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); + debug_dump_surface(filename, llvmpipe->framebuffer.zsbuf); + } + ++frame_no; } #endif - - if (fence) - *fence = NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 4ef0783f3e2..27b54c59598 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -37,9 +37,10 @@ #include "util/u_memory.h" #include "util/u_cpu_detect.h" +#include "lp_debug.h" #include "lp_screen.h" -#include "lp_bld_intr.h" -#include "lp_bld_misc.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_misc.h" #include "lp_jit.h" @@ -79,13 +80,16 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[8]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ elem_types[2] = LLVMFloatType(); /* scissor_xmin */ + elem_types[3] = LLVMFloatType(); /* scissor_ymin */ + elem_types[4] = LLVMFloatType(); /* scissor_xmax */ + elem_types[5] = LLVMFloatType(); /* scissor_ymax */ + elem_types[6] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[7] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -93,8 +97,16 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, screen->target, context_type, 1); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin, screen->target, context_type, 2); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin, + screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax, + screen->target, context_type, 4); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax, + screen->target, context_type, 5); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, + screen->target, context_type, 6); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); @@ -154,20 +166,23 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->pass = LLVMCreateFunctionPassManager(screen->provider); LLVMAddTargetData(screen->target, screen->pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ - LLVMAddConstantPropagationPass(screen->pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(screen->pass); + + if ((LP_DEBUG & DEBUG_NO_LLVM_OPT) == 0) { + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddConstantPropagationPass(screen->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(screen->pass); + } + LLVMAddPromoteMemoryToRegisterPass(screen->pass); + LLVMAddGVNPass(screen->pass); + LLVMAddCFGSimplificationPass(screen->pass); } - LLVMAddPromoteMemoryToRegisterPass(screen->pass); - LLVMAddGVNPass(screen->pass); - LLVMAddCFGSimplificationPass(screen->pass); lp_jit_init_globals(screen); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 277b690c02c..8df3015d4b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -36,7 +36,7 @@ #define LP_JIT_H -#include "lp_bld_struct.h" +#include "gallivm/lp_bld_struct.h" #include "pipe/p_state.h" @@ -79,6 +79,9 @@ struct lp_jit_context float alpha_ref_value; + /** floats, not ints */ + float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax; + /* FIXME: store (also?) in floats */ uint8_t *blend_color; @@ -92,25 +95,43 @@ struct lp_jit_context #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") +#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 2, "scissor_xmin") + +#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 3, "scissor_ymin") + +#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 4, "scissor_xmax") + +#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 5, "scissor_ymax") + #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "blend_color") + lp_build_struct_get(_builder, _ptr, 6, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 7 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") typedef void -(*lp_jit_frag_func)(struct lp_jit_context *context, +(*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, const void *a0, const void *dadx, const void *dady, - uint32_t *mask, - void *color, - void *depth); + uint8_t **color, + void *depth, + const int32_t c1, + const int32_t c2, + const int32_t c3, + const int32_t *step1, + const int32_t *step2, + const int32_t *step3); + void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c new file mode 100644 index 00000000000..a316597675c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "lp_debug.h" +#include "lp_perf.h" + + + +struct lp_counters lp_count; + + +void +lp_reset_counters(void) +{ + memset(&lp_count, 0, sizeof(lp_count)); +} + + +void +lp_print_counters(void) +{ + if (LP_DEBUG & DEBUG_COUNTERS) { + unsigned total_64, total_16, total_4; + float p1, p2, p3; + + debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); + debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); + + total_64 = (lp_count.nr_empty_64 + + lp_count.nr_fully_covered_64 + + lp_count.nr_partially_covered_64); + + p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; + p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; + p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; + + debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); + debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); + debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + + total_16 = (lp_count.nr_empty_16 + + lp_count.nr_fully_covered_16 + + lp_count.nr_partially_covered_16); + + p1 = 100.0 * (float) lp_count.nr_empty_16 / (float) total_16; + p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16; + p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16; + + debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); + debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); + debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + + total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4); + + p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; + p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; + + debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); + debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); + + debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear); + debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load); + debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store); + + debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); + debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); + + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h new file mode 100644 index 00000000000..a9629dae3c7 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Performance / statistic counters, etc. + */ + + +#ifndef LP_PERF_H +#define LP_PERF_H + + +/** + * Various counters + */ +struct lp_counters +{ + unsigned nr_tris; + unsigned nr_culled_tris; + unsigned nr_empty_64; + unsigned nr_fully_covered_64; + unsigned nr_partially_covered_64; + unsigned nr_empty_16; + unsigned nr_fully_covered_16; + unsigned nr_partially_covered_16; + unsigned nr_empty_4; + unsigned nr_non_empty_4; + unsigned nr_llvm_compiles; + int64_t llvm_compile_time; /**< total, in microseconds */ + + unsigned nr_color_tile_clear; + unsigned nr_color_tile_load; + unsigned nr_color_tile_store; +}; + + +extern struct lp_counters lp_count; + + +/** Increment the named counter (only for debug builds) */ +#ifdef DEBUG +#define LP_COUNT(counter) lp_count.counter++ +#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr) +#else +#define LP_COUNT(counter) +#define LP_COUNT_ADD(counter, incr) (void) incr +#endif + + +extern void +lp_reset_counters(void); + + +extern void +lp_print_counters(void); + + +#endif /* LP_PERF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c deleted file mode 100644 index e8e2e2524ac..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ /dev/null @@ -1,563 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Interface between 'draw' module's output and the llvmpipe rasterizer/setup - * code. When the 'draw' module has finished filling a vertex buffer, the - * draw_arrays() functions below will be called. Loop over the vertices and - * call the point/line/tri setup functions. - * - * Authors - * Brian Paul - */ - - -#include "lp_context.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "lp_prim_vbuf.h" -#include "draw/draw_context.h" -#include "draw/draw_vbuf.h" -#include "util/u_memory.h" -#include "util/u_prim.h" - - -#define LP_MAX_VBUF_INDEXES 1024 -#define LP_MAX_VBUF_SIZE 4096 - -typedef const float (*cptrf4)[4]; - -/** - * Subclass of vbuf_render. - */ -struct llvmpipe_vbuf_render -{ - struct vbuf_render base; - struct llvmpipe_context *llvmpipe; - struct setup_context *setup; - - uint prim; - uint vertex_size; - uint nr_vertices; - uint vertex_buffer_size; - void *vertex_buffer; -}; - - -/** cast wrapper */ -static struct llvmpipe_vbuf_render * -llvmpipe_vbuf_render(struct vbuf_render *vbr) -{ - return (struct llvmpipe_vbuf_render *) vbr; -} - - - - - - - -static const struct vertex_info * -lp_vbuf_get_vertex_info(struct vbuf_render *vbr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - return llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); -} - - -static boolean -lp_vbuf_allocate_vertices(struct vbuf_render *vbr, - ushort vertex_size, ushort nr_vertices) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - unsigned size = vertex_size * nr_vertices; - - if (cvbr->vertex_buffer_size < size) { - align_free(cvbr->vertex_buffer); - cvbr->vertex_buffer = align_malloc(size, 16); - cvbr->vertex_buffer_size = size; - } - - cvbr->vertex_size = vertex_size; - cvbr->nr_vertices = nr_vertices; - - return cvbr->vertex_buffer != NULL; -} - -static void -lp_vbuf_release_vertices(struct vbuf_render *vbr) -{ - /* keep the old allocation for next time */ -} - -static void * -lp_vbuf_map_vertices(struct vbuf_render *vbr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - return cvbr->vertex_buffer; -} - -static void -lp_vbuf_unmap_vertices(struct vbuf_render *vbr, - ushort min_index, - ushort max_index ) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); - (void) cvbr; - /* do nothing */ -} - - -static boolean -lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct setup_context *setup_ctx = cvbr->setup; - - llvmpipe_setup_prepare( setup_ctx ); - - cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); - cvbr->prim = prim; - return TRUE; - -} - - -static INLINE cptrf4 get_vert( const void *vertex_buffer, - int index, - int stride ) -{ - return (cptrf4)((char *)vertex_buffer + index * stride); -} - - -/** - * draw elements / indexed primitives - */ -static void -lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; - const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); - const void *vertex_buffer = cvbr->vertex_buffer; - struct setup_context *setup_ctx = cvbr->setup; - unsigned i; - - switch (cvbr->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - if (nr) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, indices[nr-1], stride), - get_vert(vertex_buffer, indices[0], stride) ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i+(i&1)-1], stride), - get_vert(vertex_buffer, indices[i-(i&1)], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i+(i&1)-2], stride), - get_vert(vertex_buffer, indices[i-(i&1)-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[0], stride), - get_vert(vertex_buffer, indices[i-1], stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_QUADS: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride)); - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. - */ - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[0], stride) ); - } - break; - - default: - assert(0); - } -} - - -/** - * This function is hit when the draw module is working in pass-through mode. - * It's up to us to convert the vertex array into point/line/tri prims. - */ -static void -lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; - struct setup_context *setup_ctx = cvbr->setup; - const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); - const void *vertex_buffer = - (void *) get_vert(cvbr->vertex_buffer, start, stride); - unsigned i; - - switch (cvbr->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - if (nr) { - llvmpipe_setup_line( setup_ctx, - get_vert(vertex_buffer, nr-1, stride), - get_vert(vertex_buffer, 0, stride) ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-2, stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i+(i&1)-1, stride), - get_vert(vertex_buffer, i-(i&1), stride), - get_vert(vertex_buffer, i-2, stride) ); - } - } - else { - for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i+(i&1)-2, stride), - get_vert(vertex_buffer, i-(i&1)-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride), - get_vert(vertex_buffer, i-1, stride) ); - } - } - else { - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, 0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_QUADS: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (llvmpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. - */ - for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride) ); - } - break; - - default: - assert(0); - } -} - - - -static void -lp_vbuf_destroy(struct vbuf_render *vbr) -{ - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - llvmpipe_setup_destroy_context(cvbr->setup); - FREE(cvbr); -} - - -/** - * Create the post-transform vertex handler for the given context. - */ -struct vbuf_render * -lp_create_vbuf_backend(struct llvmpipe_context *lp) -{ - struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - - assert(lp->draw); - - - cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; - cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - - cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; - cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; - cvbr->base.map_vertices = lp_vbuf_map_vertices; - cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; - cvbr->base.set_primitive = lp_vbuf_set_primitive; - cvbr->base.draw = lp_vbuf_draw; - cvbr->base.draw_arrays = lp_vbuf_draw_arrays; - cvbr->base.release_vertices = lp_vbuf_release_vertices; - cvbr->base.destroy = lp_vbuf_destroy; - - cvbr->llvmpipe = lp; - - cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); - - return &cvbr->base; -} diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h deleted file mode 100644 index 7eb05de77a1..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ /dev/null @@ -1,114 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <[email protected]> - */ - -#ifndef LP_QUAD_H -#define LP_QUAD_H - -#include "pipe/p_state.h" -#include "tgsi/tgsi_exec.h" - - -#define QUAD_PRIM_POINT 1 -#define QUAD_PRIM_LINE 2 -#define QUAD_PRIM_TRI 3 - - -/* The rasterizer generates 2x2 quads of fragment and feeds them to - * the current fp_machine (see below). - * Remember that Y=0=top with Y increasing down the window. - */ -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 - -#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) -#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) -#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) -#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) -#define MASK_ALL 0xf - - -/** - * Quad stage inputs (pos, coverage, front/back face, etc) - */ -struct quad_header_input -{ - int x0, y0; /**< quad window pos, always even */ - float coverage[QUAD_SIZE]; /**< fragment coverage for antialiasing */ - unsigned facing:1; /**< Front (0) or back (1) facing? */ - unsigned prim:2; /**< QUAD_PRIM_POINT, LINE, TRI */ -}; - - -/** - * Quad stage inputs/outputs. - */ -struct quad_header_inout -{ - unsigned mask:4; -}; - - -/** - * Quad stage outputs (color & depth). - */ -struct quad_header_output -{ - /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; -}; - - -/** - * Input interpolation coefficients - */ -struct quad_interp_coef -{ - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; -}; - - -/** - * Encodes everything we need to know about a 2x2 pixel block. Uses - * "Channel-Serial" or "SoA" layout. - */ -struct quad_header { - struct quad_header_input input; - struct quad_header_inout inout; - - /* Redundant/duplicated: - */ - const struct quad_interp_coef *coef; -}; - -#endif /* LP_QUAD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c new file mode 100644 index 00000000000..5ae323fd96c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -0,0 +1,1036 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <limits.h> +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" +#include "util/u_surface.h" + +#include "lp_scene_queue.h" +#include "lp_debug.h" +#include "lp_fence.h" +#include "lp_perf.h" +#include "lp_rast.h" +#include "lp_rast_priv.h" +#include "lp_tile_soa.h" +#include "gallivm/lp_bld_debug.h" +#include "lp_scene.h" + + +/** + * Begin the rasterization phase. + * Map the framebuffer surfaces. Initialize the 'rast' state. + */ +static boolean +lp_rast_begin( struct lp_rasterizer *rast, + const struct pipe_framebuffer_state *fb, + boolean write_color, + boolean write_zstencil ) +{ + struct pipe_screen *screen = rast->screen; + struct pipe_surface *cbuf, *zsbuf; + int i; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + util_copy_framebuffer_state(&rast->state.fb, fb); + + rast->state.write_zstencil = write_zstencil; + rast->state.write_color = write_color; + + rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || + fb->height % TILE_SIZE != 0); + + + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + cbuf = rast->state.fb.cbufs[i]; + if (cbuf) { + rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + cbuf->width, + cbuf->height); + if (!rast->cbuf_transfer[i]) + goto fail; + + rast->cbuf_map[i] = screen->transfer_map(rast->screen, + rast->cbuf_transfer[i]); + if (!rast->cbuf_map[i]) + goto fail; + } + } + + zsbuf = rast->state.fb.zsbuf; + if (zsbuf) { + rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + zsbuf->width, + zsbuf->height); + if (!rast->zsbuf_transfer) + goto fail; + + rast->zsbuf_map = screen->transfer_map(rast->screen, + rast->zsbuf_transfer); + if (!rast->zsbuf_map) + goto fail; + } + + return TRUE; + +fail: + /* Unmap and release transfers? + */ + return FALSE; +} + + +/** + * Finish the rasterization phase. + * Unmap framebuffer surfaces. + */ +static void +lp_rast_end( struct lp_rasterizer *rast ) +{ + struct pipe_screen *screen = rast->screen; + unsigned i; + + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + if (rast->cbuf_map[i]) + screen->transfer_unmap(screen, rast->cbuf_transfer[i]); + + if (rast->cbuf_transfer[i]) + screen->tex_transfer_destroy(rast->cbuf_transfer[i]); + + rast->cbuf_transfer[i] = NULL; + rast->cbuf_map[i] = NULL; + } + + if (rast->zsbuf_map) + screen->transfer_unmap(screen, rast->zsbuf_transfer); + + if (rast->zsbuf_transfer) + screen->tex_transfer_destroy(rast->zsbuf_transfer); + + rast->zsbuf_transfer = NULL; + rast->zsbuf_map = NULL; +} + + +/** + * Begining rasterization of a tile. + * \param x window X position of the tile, in pixels + * \param y window Y position of the tile, in pixels + */ +static void +lp_rast_start_tile( struct lp_rasterizer *rast, + unsigned thread_index, + unsigned x, unsigned y ) +{ + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); + + rast->tasks[thread_index].x = x; + rast->tasks[thread_index].y = y; +} + + +/** + * Clear the rasterizer's current color tile. + * This is a bin command called during bin processing. + */ +void lp_rast_clear_color( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + const uint8_t *clear_color = arg.clear_color; + uint8_t **color_tile = rast->tasks[thread_index].tile.color; + unsigned i; + + LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, + clear_color[0], + clear_color[1], + clear_color[2], + clear_color[3]); + + if (clear_color[0] == clear_color[1] && + clear_color[1] == clear_color[2] && + clear_color[2] == clear_color[3]) { + /* clear to grayscale value {x, x, x, x} */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); + } + } + else { + /* Non-gray color. + * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code + * will need to change. It'll be pretty obvious when clearing no longer + * works. + */ + const unsigned chunk = TILE_SIZE / 4; + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + uint8_t *c = color_tile[i]; + unsigned j; + for (j = 0; j < 4 * TILE_SIZE; j++) { + memset(c, clear_color[0], chunk); + c += chunk; + memset(c, clear_color[1], chunk); + c += chunk; + memset(c, clear_color[2], chunk); + c += chunk; + memset(c, clear_color[3], chunk); + c += chunk; + } + assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4); + } + } + + LP_COUNT(nr_color_tile_clear); +} + + +/** + * Clear the rasterizer's current z/stencil tile. + * This is a bin command called during bin processing. + */ +void lp_rast_clear_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg) +{ + unsigned i; + uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; + + LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); + + for (i = 0; i < TILE_SIZE * TILE_SIZE; i++) + depth_tile[i] = arg.clear_zstencil; +} + + +/** + * Load tile color from the framebuffer surface. + * This is a bin command called during bin processing. + */ +void lp_rast_load_color( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg) +{ + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; + unsigned i; + + LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); + + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + struct pipe_transfer *transfer = rast->cbuf_transfer[i]; + int w = TILE_SIZE; + int h = TILE_SIZE; + + if (x >= transfer->width) + continue; + + if (y >= transfer->height) + continue; + + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + lp_tile_read_4ub(transfer->texture->format, + task->tile.color[i], + rast->cbuf_map[i], + transfer->stride, + x, y, + w, h); + + LP_COUNT(nr_color_tile_load); + } +} + + +static void +lp_tile_read_z32(uint32_t *tile, + const uint8_t *map, + unsigned map_stride, + unsigned x0, unsigned y0, unsigned w, unsigned h) +{ + unsigned x, y; + const uint8_t *map_row = map + y0*map_stride; + for (y = 0; y < h; ++y) { + const uint32_t *map_pixel = (uint32_t *)(map_row + x0*4); + for (x = 0; x < w; ++x) { + *tile++ = *map_pixel++; + } + map_row += map_stride; + } +} + +/** + * Load tile z/stencil from the framebuffer surface. + * This is a bin command called during bin processing. + */ +void lp_rast_load_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; + + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; + + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + + assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); + lp_tile_read_z32(task->tile.depth, + rast->zsbuf_map, + rast->zsbuf_transfer->stride, + x, y, w, h); +} + + +void lp_rast_set_state( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + const struct lp_rast_state *state = arg.set_state; + + LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); + + /* just set the current state pointer for this rasterizer */ + rast->tasks[thread_index].current_state = state; +} + + + +/** + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle. + * This is a bin command called during bin processing. + */ +void lp_rast_shade_tile( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const unsigned tile_x = task->x; + const unsigned tile_y = task->y; + unsigned x, y; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + /* render the whole 64x64 tile in 4x4 chunks */ + for (y = 0; y < TILE_SIZE; y += 4){ + for (x = 0; x < TILE_SIZE; x += 4) { + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + uint32_t *depth; + unsigned block_offset, i; + + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); + + /* color buffer */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + + /* run shader */ + state->jit_function[0]( &state->jit_context, + tile_x + x, tile_y + y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); + } + } +} + + +/** + * Compute shading for a 4x4 block of pixels. + * This is a bin command called during bin processing. + */ +void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + int32_t c1, int32_t c2, int32_t c3) +{ + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + void *depth; + unsigned i; + unsigned ix, iy; + int block_offset; + +#ifdef DEBUG + assert(state); + + /* Sanity checks */ + assert(x % TILE_VECTOR_WIDTH == 0); + assert(y % TILE_VECTOR_HEIGHT == 0); + + assert((x % 4) == 0); + assert((y % 4) == 0); +#endif + + ix = x % TILE_SIZE; + iy = y % TILE_SIZE; + + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); + + /* color buffer */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + + + +#ifdef DEBUG + assert(lp_check_alignment(tile->depth, 16)); + assert(lp_check_alignment(tile->color[0], 16)); + assert(lp_check_alignment(state->jit_context.blend_color, 16)); + + assert(lp_check_alignment(inputs->step[0], 16)); + assert(lp_check_alignment(inputs->step[1], 16)); + assert(lp_check_alignment(inputs->step[2], 16)); +#endif + + /* run shader */ + state->jit_function[1]( &state->jit_context, + x, y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + c1, c2, c3, + inputs->step[0], inputs->step[1], inputs->step[2]); +} + + +/** + * Set top row and left column of the tile's pixels to white. For debugging. + */ +static void +outline_tile(uint8_t *tile) +{ + const uint8_t val = 0xff; + unsigned i; + + for (i = 0; i < TILE_SIZE; i++) { + TILE_PIXEL(tile, i, 0, 0) = val; + TILE_PIXEL(tile, i, 0, 1) = val; + TILE_PIXEL(tile, i, 0, 2) = val; + TILE_PIXEL(tile, i, 0, 3) = val; + + TILE_PIXEL(tile, 0, i, 0) = val; + TILE_PIXEL(tile, 0, i, 1) = val; + TILE_PIXEL(tile, 0, i, 2) = val; + TILE_PIXEL(tile, 0, i, 3) = val; + } +} + + +/** + * Draw grid of gray lines at 16-pixel intervals across the tile to + * show the sub-tile boundaries. For debugging. + */ +static void +outline_subtiles(uint8_t *tile) +{ + const uint8_t val = 0x80; + const unsigned step = 16; + unsigned i, j; + + for (i = 0; i < TILE_SIZE; i += step) { + for (j = 0; j < TILE_SIZE; j++) { + TILE_PIXEL(tile, i, j, 0) = val; + TILE_PIXEL(tile, i, j, 1) = val; + TILE_PIXEL(tile, i, j, 2) = val; + TILE_PIXEL(tile, i, j, 3) = val; + + TILE_PIXEL(tile, j, i, 0) = val; + TILE_PIXEL(tile, j, i, 1) = val; + TILE_PIXEL(tile, j, i, 2) = val; + TILE_PIXEL(tile, j, i, 3) = val; + } + } + + outline_tile(tile); +} + + + +/** + * Write the rasterizer's color tile to the framebuffer. + */ +static void lp_rast_store_color( struct lp_rasterizer *rast, + unsigned thread_index) +{ + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; + unsigned i; + + for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + struct pipe_transfer *transfer = rast->cbuf_transfer[i]; + int w = TILE_SIZE; + int h = TILE_SIZE; + + if (x >= transfer->width) + continue; + + if (y >= transfer->height) + continue; + + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, + thread_index, x, y, w, h); + + if (LP_DEBUG & DEBUG_SHOW_SUBTILES) + outline_subtiles(task->tile.color[i]); + else if (LP_DEBUG & DEBUG_SHOW_TILES) + outline_tile(task->tile.color[i]); + + lp_tile_write_4ub(transfer->texture->format, + task->tile.color[i], + rast->cbuf_map[i], + transfer->stride, + x, y, + w, h); + + LP_COUNT(nr_color_tile_store); + } +} + + +static void +lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, + unsigned x0, unsigned y0, unsigned w, unsigned h) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < h; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4); + for (x = 0; x < w; ++x) { + *dst_pixel++ = *src++; + } + dst_row += dst_stride; + } +} + +/** + * Write the rasterizer's z/stencil tile to the framebuffer. + */ +static void lp_rast_store_zstencil( struct lp_rasterizer *rast, + unsigned thread_index ) +{ + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; + + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; + + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + + assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); + lp_tile_write_z32(task->tile.depth, + rast->zsbuf_map, + rast->zsbuf_transfer->stride, + x, y, w, h); +} + + +/** + * Write the rasterizer's tiles to the framebuffer. + */ +static void +lp_rast_end_tile( struct lp_rasterizer *rast, + unsigned thread_index ) +{ + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + if (rast->state.write_color) + lp_rast_store_color(rast, thread_index); + + if (rast->state.write_zstencil) + lp_rast_store_zstencil(rast, thread_index); +} + + +/** + * Signal on a fence. This is called during bin execution/rasterization. + * Called per thread. + */ +void lp_rast_fence( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + struct lp_fence *fence = arg.fence; + + pipe_mutex_lock( fence->mutex ); + + fence->count++; + assert(fence->count <= fence->rank); + + LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); + + pipe_condvar_signal( fence->signalled ); + + pipe_mutex_unlock( fence->mutex ); +} + + +/** + * When all the threads are done rasterizing a scene, one thread will + * call this function to reset the scene and put it onto the empty queue. + */ +static void +release_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ) +{ + util_unreference_framebuffer_state( &scene->fb ); + + lp_scene_reset( scene ); + lp_scene_enqueue( rast->empty_scenes, scene ); + rast->curr_scene = NULL; +} + + +/** + * Rasterize commands for a single bin. + * \param x, y position of the bin's tile in the framebuffer + * Must be called between lp_rast_begin() and lp_rast_end(). + * Called per thread. + */ +static void +rasterize_bin( struct lp_rasterizer *rast, + unsigned thread_index, + const struct cmd_bin *bin, + int x, int y) +{ + const struct cmd_block_list *commands = &bin->commands; + struct cmd_block *block; + unsigned k; + + lp_rast_start_tile( rast, thread_index, x, y ); + + /* simply execute each of the commands in the block list */ + for (block = commands->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, thread_index, block->arg[k] ); + } + } + + lp_rast_end_tile( rast, thread_index ); +} + + +#define RAST(x) { lp_rast_##x, #x } + +static struct { + lp_rast_cmd cmd; + const char *name; +} cmd_names[] = +{ + RAST(load_color), + RAST(load_zstencil), + RAST(clear_color), + RAST(clear_zstencil), + RAST(triangle), + RAST(shade_tile), + RAST(set_state), + RAST(fence), +}; + +static void +debug_bin( const struct cmd_bin *bin ) +{ + const struct cmd_block *head = bin->commands.head; + int i, j; + + for (i = 0; i < head->count; i++) { + debug_printf("%d: ", i); + for (j = 0; j < Elements(cmd_names); j++) { + if (head->cmd[i] == cmd_names[j].cmd) { + debug_printf("%s\n", cmd_names[j].name); + break; + } + } + if (j == Elements(cmd_names)) + debug_printf("...other\n"); + } + +} + +/* An empty bin is one that just loads the contents of the tile and + * stores them again unchanged. This typically happens when bins have + * been flushed for some reason in the middle of a frame, or when + * incremental updates are being made to a render target. + * + * Try to avoid doing pointless work in this case. + */ +static boolean +is_empty_bin( const struct cmd_bin *bin ) +{ + const struct cmd_block *head = bin->commands.head; + int i; + + if (0) + debug_bin(bin); + + /* We emit at most two load-tile commands at the start of the first + * command block. In addition we seem to emit a couple of + * set-state commands even in empty bins. + * + * As a heuristic, if a bin has more than 4 commands, consider it + * non-empty. + */ + if (head->next != NULL || + head->count > 4) { + return FALSE; + } + + for (i = 0; i < head->count; i++) + if (head->cmd[i] != lp_rast_load_color && + head->cmd[i] != lp_rast_load_zstencil && + head->cmd[i] != lp_rast_set_state) { + return FALSE; + } + + return TRUE; +} + + + +/** + * Rasterize/execute all bins within a scene. + * Called per thread. + */ +static void +rasterize_scene( struct lp_rasterizer *rast, + unsigned thread_index, + struct lp_scene *scene, + bool write_depth ) +{ + /* loop over scene bins, rasterize each */ +#if 0 + { + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(scene, i, j); + rasterize_bin( rast, thread_index, + bin, i * TILE_SIZE, j * TILE_SIZE ); + } + } + } +#else + { + struct cmd_bin *bin; + int x, y; + + assert(scene); + while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { + if (!is_empty_bin( bin )) + rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + } + } +#endif +} + + +/** + * Called by setup module when it has something for us to render. + */ +void +lp_rasterize_scene( struct lp_rasterizer *rast, + struct lp_scene *scene, + const struct pipe_framebuffer_state *fb, + bool write_depth ) +{ + boolean debug = false; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + if (debug) { + unsigned x, y; + debug_printf("rasterize scene:\n"); + debug_printf(" data size: %u\n", lp_scene_data_size(scene)); + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + debug_printf(" bin %u, %u size: %u\n", x, y, + lp_scene_bin_size(scene, x, y)); + } + } + } + + /* save framebuffer state in the bin */ + util_copy_framebuffer_state(&scene->fb, fb); + scene->write_depth = write_depth; + + if (rast->num_threads == 0) { + /* no threading */ + + lp_rast_begin( rast, fb, + fb->nr_cbufs != 0, /* always write color if cbufs present */ + fb->zsbuf != NULL && write_depth ); + + lp_scene_bin_iter_begin( scene ); + rasterize_scene( rast, 0, scene, write_depth ); + + release_scene( rast, scene ); + + lp_rast_end( rast ); + } + else { + /* threaded rendering! */ + unsigned i; + + lp_scene_enqueue( rast->full_scenes, scene ); + + /* signal the threads that there's work to do */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + /* wait for work to complete */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_wait(&rast->tasks[i].work_done); + } + } + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + +/** + * This is the thread's main entrypoint. + * It's a simple loop: + * 1. wait for work + * 2. do work + * 3. signal that we're done + */ +static PIPE_THREAD_ROUTINE( thread_func, init_data ) +{ + struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; + struct lp_rasterizer *rast = task->rast; + boolean debug = false; + + while (1) { + /* wait for work */ + if (debug) + debug_printf("thread %d waiting for work\n", task->thread_index); + pipe_semaphore_wait(&task->work_ready); + + if (task->thread_index == 0) { + /* thread[0]: + * - get next scene to rasterize + * - map the framebuffer surfaces + */ + const struct pipe_framebuffer_state *fb; + boolean write_depth; + + rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE ); + + lp_scene_bin_iter_begin( rast->curr_scene ); + + fb = &rast->curr_scene->fb; + write_depth = rast->curr_scene->write_depth; + + lp_rast_begin( rast, fb, + fb->nr_cbufs != 0, + fb->zsbuf != NULL && write_depth ); + } + + /* Wait for all threads to get here so that threads[1+] don't + * get a null rast->curr_scene pointer. + */ + pipe_barrier_wait( &rast->barrier ); + + /* do work */ + if (debug) + debug_printf("thread %d doing work\n", task->thread_index); + rasterize_scene(rast, + task->thread_index, + rast->curr_scene, + rast->curr_scene->write_depth); + + /* wait for all threads to finish with this scene */ + pipe_barrier_wait( &rast->barrier ); + + if (task->thread_index == 0) { + /* thread[0]: + * - release the scene object + * - unmap the framebuffer surfaces + */ + release_scene( rast, rast->curr_scene ); + lp_rast_end( rast ); + } + + /* signal done with work */ + if (debug) + debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); + } + + return NULL; +} + + +/** + * Initialize semaphores and spawn the threads. + */ +static void +create_rast_threads(struct lp_rasterizer *rast) +{ + unsigned i; + +#ifdef PIPE_OS_WINDOWS + /* Multithreading not supported on windows until conditions and barriers are + * properly implemented. */ + rast->num_threads = 0; +#else + rast->num_threads = util_cpu_caps.nr_cpus; + rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); + rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); +#endif + + /* NOTE: if num_threads is zero, we won't use any threads */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_init(&rast->tasks[i].work_ready, 0); + pipe_semaphore_init(&rast->tasks[i].work_done, 0); + rast->threads[i] = pipe_thread_create(thread_func, + (void *) &rast->tasks[i]); + } +} + + + +/** + * Create new lp_rasterizer. + * \param empty the queue to put empty scenes on after we've finished + * processing them. + */ +struct lp_rasterizer * +lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) +{ + struct lp_rasterizer *rast; + unsigned i, cbuf; + + rast = CALLOC_STRUCT(lp_rasterizer); + if(!rast) + return NULL; + + rast->screen = screen; + + rast->empty_scenes = empty; + rast->full_scenes = lp_scene_queue_create(); + + for (i = 0; i < Elements(rast->tasks); i++) { + struct lp_rasterizer_task *task = &rast->tasks[i]; + + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) + task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); + + task->tile.depth = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); + task->rast = rast; + task->thread_index = i; + } + + create_rast_threads(rast); + + /* for synchronizing rasterization threads */ + pipe_barrier_init( &rast->barrier, rast->num_threads ); + + return rast; +} + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + unsigned i, cbuf; + + util_unreference_framebuffer_state(&rast->state.fb); + + for (i = 0; i < Elements(rast->tasks); i++) { + align_free(rast->tasks[i].tile.depth); + for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) + align_free(rast->tasks[i].tile.color[cbuf]); + } + + /* for synchronizing rasterization threads */ + pipe_barrier_destroy( &rast->barrier ); + + FREE(rast); +} + + +/** Return number of rasterization threads */ +unsigned +lp_rast_get_num_threads( struct lp_rasterizer *rast ) +{ + return rast->num_threads; +} diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h new file mode 100644 index 00000000000..34da73eb50e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -0,0 +1,236 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * The rast code is concerned with rasterization of command bins. + * Each screen tile has a bin associated with it. To render the + * scene we iterate over the tile bins and execute the commands + * in each bin. + * We'll do that with multiple threads... + */ + + +#ifndef LP_RAST_H +#define LP_RAST_H + +#include "pipe/p_compiler.h" +#include "lp_jit.h" + + +struct lp_rasterizer; +struct lp_scene; +struct lp_scene_queue; +struct lp_fence; +struct cmd_bin; +struct pipe_screen; + +/** For sub-pixel positioning */ +#define FIXED_ORDER 4 +#define FIXED_ONE (1<<FIXED_ORDER) + + +/** + * Rasterization state. + * Objects of this type are put into the shared data bin and pointed + * to by commands in the per-tile bins. + */ +struct lp_rast_state { + /* State for the shader. This also contains state which feeds into + * the fragment shader, such as blend color and alpha ref value. + */ + struct lp_jit_context jit_context; + + /* The shader itself. Probably we also need to pass a pointer to + * the tile color/z/stencil data somehow: + * jit_function[0] skips the triangle in/out test code + * jit_function[1] does triangle in/out testing + */ + lp_jit_frag_func jit_function[2]; + + boolean opaque; +}; + + +/** + * Coefficients necessary to run the shader at a given location. + * First coefficient is position. + * These pointers point into the bin data buffer. + */ +struct lp_rast_shader_inputs { + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; + + /* edge/step info for 3 edges and 4x4 block of pixels */ + PIPE_ALIGN_VAR(16) int step[3][16]; +}; + + +/** + * Rasterization information for a triangle known to be in this bin, + * plus inputs to run the shader: + * These fields are tile- and bin-independent. + * Objects of this type are put into the setup_context::data buffer. + */ +struct lp_rast_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + int ei1; + int ei2; + int ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo1; + int eo2; + int eo3; + + /* y deltas for vertex pairs (in fixed pt) */ + int dy12; + int dy23; + int dy31; + + /* x deltas for vertex pairs (in fixed pt) */ + int dx12; + int dx23; + int dx31; + + /* edge function values at minx,miny ?? */ + int c1, c2, c3; + + /* inputs for the shader */ + PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs; +}; + + + +struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, + struct lp_scene_queue *empty ); + +void lp_rast_destroy( struct lp_rasterizer * ); + +unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); + +void lp_rasterize_scene( struct lp_rasterizer *rast, + struct lp_scene *scene, + const struct pipe_framebuffer_state *fb, + bool write_depth ); + + + +union lp_rast_cmd_arg { + const struct lp_rast_shader_inputs *shade_tile; + const struct lp_rast_triangle *triangle; + const struct lp_rast_state *set_state; + uint8_t clear_color[4]; + unsigned clear_zstencil; + struct lp_fence *fence; +}; + + +/* Cast wrappers. Hopefully these compile to noops! + */ +static INLINE union lp_rast_cmd_arg +lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile ) +{ + union lp_rast_cmd_arg arg; + arg.shade_tile = shade_tile; + return arg; +} + +static INLINE union lp_rast_cmd_arg +lp_rast_arg_triangle( const struct lp_rast_triangle *triangle ) +{ + union lp_rast_cmd_arg arg; + arg.triangle = triangle; + return arg; +} + +static INLINE union lp_rast_cmd_arg +lp_rast_arg_state( const struct lp_rast_state *state ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = state; + return arg; +} + +static INLINE union lp_rast_cmd_arg +lp_rast_arg_fence( struct lp_fence *fence ) +{ + union lp_rast_cmd_arg arg; + arg.fence = fence; + return arg; +} + + +static INLINE union lp_rast_cmd_arg +lp_rast_arg_null( void ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = NULL; + return arg; +} + + + +/** + * Binnable Commands. + * These get put into bins by the setup code and are called when + * the bins are executed. + */ + +void lp_rast_clear_color( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +void lp_rast_clear_zstencil( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +void lp_rast_load_color( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +void lp_rast_load_zstencil( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +void lp_rast_set_state( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +void lp_rast_shade_tile( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +void lp_rast_fence( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h new file mode 100644 index 00000000000..71e3a301e61 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_RAST_PRIV_H +#define LP_RAST_PRIV_H + +#include "os/os_thread.h" +#include "lp_rast.h" +#include "lp_tile_soa.h" + + +#define MAX_THREADS 8 /* XXX probably temporary here */ + + +struct pipe_transfer; +struct pipe_screen; +struct lp_rasterizer; + + +/** + * A tile's color and depth memory. + * We can choose whatever layout for the internal tile storage we prefer. + */ +struct lp_rast_tile +{ + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + + uint32_t *depth; +}; + + +/** + * Per-thread rasterization state + */ +struct lp_rasterizer_task +{ + struct lp_rast_tile tile; /** Tile color/z/stencil memory */ + + unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + + const struct lp_rast_state *current_state; + + /** "back" pointer */ + struct lp_rasterizer *rast; + + /** "my" index */ + unsigned thread_index; + + pipe_semaphore work_ready; + pipe_semaphore work_done; +}; + + +/** + * This is the state required while rasterizing tiles. + * Note that this contains per-thread information too. + * The tile size is TILE_SIZE x TILE_SIZE pixels. + */ +struct lp_rasterizer +{ + boolean clipped_tile; + boolean check_for_clipped_tiles; + + /* Framebuffer stuff + */ + struct pipe_screen *screen; + struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; + struct pipe_transfer *zsbuf_transfer; + void *cbuf_map[PIPE_MAX_COLOR_BUFS]; + void *zsbuf_map; + + struct { + struct pipe_framebuffer_state fb; + boolean write_color; + boolean write_zstencil; + unsigned clear_color; + unsigned clear_depth; + char clear_stencil; + } state; + + /** The incoming queue of scenes ready to rasterize */ + struct lp_scene_queue *full_scenes; + /** The outgoing queue of processed scenes to return to setup modulee */ + struct lp_scene_queue *empty_scenes; + + /** The scene currently being rasterized by the threads */ + struct lp_scene *curr_scene; + + /** A task object for each rasterization thread */ + struct lp_rasterizer_task tasks[MAX_THREADS]; + + unsigned num_threads; + pipe_thread threads[MAX_THREADS]; + + /** For synchronizing the rasterization threads */ + pipe_barrier barrier; +}; + + +void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + int32_t c1, int32_t c2, int32_t c3); + + +/** + * Shade all pixels in a 4x4 block. The fragment code omits the + * triangle in/out tests. + * \param x, y location of 4x4 block in window coords + */ +static INLINE void +lp_rast_shade_quads_all( struct lp_rasterizer *rast, + unsigned thread_index, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y ) +{ + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + void *depth; + unsigned block_offset, i; + + /* offset of the containing 16x16 pixel block within the tile */ + block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; + + /* color buffer */ + for (i = 0; i < rast->state.fb.nr_cbufs; i++) + color[i] = tile->color[i] + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + + /* run shader */ + state->jit_function[0]( &state->jit_context, + x, y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); +} + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c new file mode 100644 index 00000000000..3f76f159df1 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -0,0 +1,251 @@ +/************************************************************************** + * + * Copyright 2007-2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Rasterization for binned triangles within a tile + */ + +#include <limits.h> +#include "util/u_math.h" +#include "lp_debug.h" +#include "lp_perf.h" +#include "lp_rast_priv.h" +#include "lp_tile_soa.h" + + +/** + * Map an index in [0,15] to an x,y position, multiplied by 4. + * This is used to get the position of each subtile in a 4x4 + * grid of edge step values. + * Note: we can use some bit twiddling to compute these values instead + * of using a look-up table, but there's no measurable performance + * difference. + */ +static const int pos_table4[16][2] = { + { 0, 0 }, + { 4, 0 }, + { 0, 4 }, + { 4, 4 }, + { 8, 0 }, + { 12, 0 }, + { 8, 4 }, + { 12, 4 }, + { 0, 8 }, + { 4, 8 }, + { 0, 12 }, + { 4, 12 }, + { 8, 8 }, + { 12, 8 }, + { 8, 12 }, + { 12, 12 } +}; + + +static const int pos_table16[16][2] = { + { 0, 0 }, + { 16, 0 }, + { 0, 16 }, + { 16, 16 }, + { 32, 0 }, + { 48, 0 }, + { 32, 16 }, + { 48, 16 }, + { 0, 32 }, + { 16, 32 }, + { 0, 48 }, + { 16, 48 }, + { 32, 32 }, + { 48, 32 }, + { 32, 48 }, + { 48, 48 } +}; + + +/** + * Shade all pixels in a 4x4 block. + */ +static void +block_full_4( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y ) +{ + lp_rast_shade_quads_all(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y); +} + + +/** + * Shade all pixels in a 16x16 block. + */ +static void +block_full_16( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y ) +{ + unsigned ix, iy; + assert(x % 16 == 0); + assert(y % 16 == 0); + for (iy = 0; iy < 16; iy += 4) + for (ix = 0; ix < 16; ix += 4) + block_full_4(rast_task, tri, x + ix, y + iy); +} + + +/** + * Pass the 4x4 pixel block to the shader function. + * Determination of which of the 16 pixels lies inside the triangle + * will be done as part of the fragment shader. + */ +static void +do_block_4( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) +{ + lp_rast_shade_quads(rast_task->rast, + rast_task->thread_index, + &tri->inputs, + x, y, + -c1, -c2, -c3); +} + + +/** + * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out + * of the triangle's bounds. + */ +static void +do_block_16( struct lp_rasterizer_task *rast_task, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, + int c2, + int c3 ) +{ + const int eo1 = tri->eo1 * 4; + const int eo2 = tri->eo2 * 4; + const int eo3 = tri->eo3 * 4; + const int *step0 = tri->inputs.step[0]; + const int *step1 = tri->inputs.step[1]; + const int *step2 = tri->inputs.step[2]; + int i; + + assert(x % 16 == 0); + assert(y % 16 == 0); + + for (i = 0; i < 16; i++) { + int cx1 = c1 + step0[i] * 4; + int cx2 = c2 + step1[i] * 4; + int cx3 = c3 + step2[i] * 4; + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ + LP_COUNT(nr_empty_4); + } + else { + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + /* Don't bother testing if the 4x4 block is entirely in/out of + * the triangle. It's a little faster to do it in the jit code. + */ + LP_COUNT(nr_non_empty_4); + do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); + } + } +} + + +/** + * Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle. + */ +void +lp_rast_triangle( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index]; + const struct lp_rast_triangle *tri = arg.triangle; + + int x = rast_task->x; + int y = rast_task->y; + unsigned i; + + int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; + int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; + int c3 = tri->c3 + tri->dx31 * y - tri->dy31 * x; + + int ei1 = tri->ei1 * 16; + int ei2 = tri->ei2 * 16; + int ei3 = tri->ei3 * 16; + + int eo1 = tri->eo1 * 16; + int eo2 = tri->eo2 * 16; + int eo3 = tri->eo3 * 16; + + LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); + + /* Walk over the tile to build a list of 4x4 pixel blocks which will + * be filled/shaded. We do this at two granularities: 16x16 blocks + * and then 4x4 blocks. + */ + for (i = 0; i < 16; i++) { + int cx1 = c1 + (tri->inputs.step[0][i] * 16); + int cx2 = c2 + (tri->inputs.step[1][i] * 16); + int cx3 = c3 + (tri->inputs.step[2][i] * 16); + + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) { + /* the block is completely outside the triangle - nop */ + LP_COUNT(nr_empty_16); + } + else { + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + + if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) { + /* the block is completely inside the triangle */ + LP_COUNT(nr_fully_covered_16); + block_full_16(rast_task, tri, px, py); + } + else { + /* the block is partially in/out of the triangle */ + LP_COUNT(nr_partially_covered_16); + do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); + } + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c new file mode 100644 index 00000000000..b7116297ece --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -0,0 +1,392 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_simple_list.h" +#include "lp_scene.h" + + +struct lp_scene * +lp_scene_create(void) +{ + struct lp_scene *scene = CALLOC_STRUCT(lp_scene); + if (scene) + lp_scene_init(scene); + return scene; +} + + +void +lp_scene_destroy(struct lp_scene *scene) +{ + lp_scene_reset(scene); + lp_scene_free_bin_data(scene); + FREE(scene); +} + + +void +lp_scene_init(struct lp_scene *scene) +{ + unsigned i, j; + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + + scene->data.head = + scene->data.tail = CALLOC_STRUCT(data_block); + + make_empty_list(&scene->textures); + + pipe_mutex_init(scene->mutex); +} + + +/** + * Check if the scene's bins are all empty. + * For debugging purposes. + */ +boolean +lp_scene_is_empty(struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < TILES_Y; y++) { + for (x = 0; x < TILES_X; x++) { + const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + const struct cmd_block_list *list = &bin->commands; + if (list->head != list->tail || list->head->count > 0) { + return FALSE; + } + } + } + return TRUE; +} + + +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + struct cmd_block_list *list = &bin->commands; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->count = 0; +} + + +/** + * Set scene to empty state. + */ +void +lp_scene_reset(struct lp_scene *scene ) +{ + unsigned i, j; + + /* Free all but last binner command lists: + */ + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + lp_scene_bin_reset(scene, i, j); + } + } + + assert(lp_scene_is_empty(scene)); + + /* Free all but last binned data block: + */ + { + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->head; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + assert(list->tail->next == NULL); + list->head = list->tail; + list->head->used = 0; + } + + /* Release texture refs + */ + { + struct texture_ref *ref, *next, *ref_list = &scene->textures; + for (ref = ref_list->next; ref != ref_list; ref = next) { + next = next_elem(ref); + pipe_texture_reference(&ref->texture, NULL); + FREE(ref); + } + make_empty_list(ref_list); + } +} + + +/** + * Free all data associated with the given bin, but don't free(scene). + */ +void +lp_scene_free_bin_data(struct lp_scene *scene) +{ + unsigned i, j; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + /* lp_reset_scene() should have been already called */ + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; + } + + FREE(scene->data.head); + scene->data.head = NULL; + + pipe_mutex_destroy(scene->mutex); +} + + +void +lp_scene_set_framebuffer_size( struct lp_scene *scene, + unsigned width, unsigned height ) +{ + assert(lp_scene_is_empty(scene)); + + scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; + scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; +} + + +void +lp_bin_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} + + +void +lp_bin_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} + + +/** Return number of bytes used for all bin data within a scene */ +unsigned +lp_scene_data_size( const struct lp_scene *scene ) +{ + unsigned size = 0; + const struct data_block *block; + for (block = scene->data.head; block; block = block->next) { + size += block->used; + } + return size; +} + + +/** Return number of bytes used for a single bin */ +unsigned +lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) +{ + struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y); + const struct cmd_block *cmd; + unsigned size = 0; + for (cmd = bin->commands.head; cmd; cmd = cmd->next) { + size += (cmd->count * + (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); + } + return size; +} + + +/** + * Add a reference to a texture by the scene. + */ +void +lp_scene_texture_reference( struct lp_scene *scene, + struct pipe_texture *texture ) +{ + struct texture_ref *ref = CALLOC_STRUCT(texture_ref); + if (ref) { + struct texture_ref *ref_list = &scene->textures; + pipe_texture_reference(&ref->texture, texture); + insert_at_tail(ref_list, ref); + } +} + + +/** + * Does this scene have a reference to the given texture? + */ +boolean +lp_scene_is_texture_referenced( const struct lp_scene *scene, + const struct pipe_texture *texture ) +{ + const struct texture_ref *ref_list = &scene->textures; + const struct texture_ref *ref; + foreach (ref, ref_list) { + if (ref->texture == texture) + return TRUE; + } + return FALSE; +} + + +/** + * Return last command in the bin + */ +static lp_rast_cmd +lp_get_last_command( const struct cmd_bin *bin ) +{ + const struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + if (i > 0) + return tail->cmd[i - 1]; + else + return NULL; +} + + +/** + * Replace the arg of the last command in the bin. + */ +static void +lp_replace_last_command_arg( struct cmd_bin *bin, + const union lp_rast_cmd_arg arg ) +{ + struct cmd_block *tail = bin->commands.tail; + const unsigned i = tail->count; + assert(i > 0); + tail->arg[i - 1] = arg; +} + + + +/** + * Put a state-change command into all bins. + * If we find that the last command in a bin was also a state-change + * command, we can simply replace that one with the new one. + */ +void +lp_scene_bin_state_command( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + lp_rast_cmd last_cmd = lp_get_last_command(bin); + if (last_cmd == cmd) { + lp_replace_last_command_arg(bin, arg); + } + else { + lp_scene_bin_command( scene, i, j, cmd, arg ); + } + } + } +} + + +/** advance curr_x,y to the next bin */ +static boolean +next_bin(struct lp_scene *scene) +{ + scene->curr_x++; + if (scene->curr_x >= scene->tiles_x) { + scene->curr_x = 0; + scene->curr_y++; + } + if (scene->curr_y >= scene->tiles_y) { + /* no more bins */ + return FALSE; + } + return TRUE; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ) +{ + scene->curr_x = scene->curr_y = -1; +} + + +/** + * Return pointer to next bin to be rendered. + * The lp_scene::curr_x and ::curr_y fields will be advanced. + * Multiple rendering threads will call this function to get a chunk + * of work (a bin) to work on. + */ +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ) +{ + struct cmd_bin *bin = NULL; + + pipe_mutex_lock(scene->mutex); + + if (scene->curr_x < 0) { + /* first bin */ + scene->curr_x = 0; + scene->curr_y = 0; + } + else if (!next_bin(scene)) { + /* no more bins left */ + goto end; + } + + bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y); + *bin_x = scene->curr_x; + *bin_y = scene->curr_y; + +end: + /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ + pipe_mutex_unlock(scene->mutex); + return bin; +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h new file mode 100644 index 00000000000..fb478cc2eb5 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -0,0 +1,301 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Binner data structures and bin-related functions. + * Note: the "setup" code is concerned with building scenes while + * The "rast" code is concerned with consuming/executing scenes. + */ + +#ifndef LP_SCENE_H +#define LP_SCENE_H + +#include "os/os_thread.h" +#include "lp_tile_soa.h" +#include "lp_rast.h" + + +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 +#define TILES_X (MAXWIDTH / TILE_SIZE) +#define TILES_Y (MAXHEIGHT / TILE_SIZE) + + +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) + + + +/* switch to a non-pointer value for this: + */ +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); + +struct cmd_block { + lp_rast_cmd cmd[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; + unsigned count; + struct cmd_block *next; +}; + +struct data_block { + ubyte data[DATA_BLOCK_SIZE]; + unsigned used; + struct data_block *next; +}; + +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; +}; + +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + struct cmd_block_list commands; +}; + + +/** + * This stores bulk data which is shared by all bins within a scene. + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + */ +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + + +/** List of texture references */ +struct texture_ref { + struct pipe_texture *texture; + struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ +}; + + +/** + * All bins and bin data are contained here. + * Per-bin data goes into the 'tile' bins. + * Shared data goes into the 'data' buffer. + * + * When there are multiple threads, will want to double-buffer between + * scenes: + */ +struct lp_scene { + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; + + /** the framebuffer to render the scene into */ + struct pipe_framebuffer_state fb; + + /** list of textures referenced by the scene commands */ + struct texture_ref textures; + + boolean write_depth; + + /** + * Number of active tiles in each dimension. + * This basically the framebuffer size divided by tile size + */ + unsigned tiles_x, tiles_y; + + int curr_x, curr_y; /**< for iterating over bins */ + pipe_mutex mutex; +}; + + + +struct lp_scene *lp_scene_create(void); + +void lp_scene_destroy(struct lp_scene *scene); + + +void lp_scene_init(struct lp_scene *scene); + +boolean lp_scene_is_empty(struct lp_scene *scene ); + +void lp_scene_reset(struct lp_scene *scene ); + +void lp_scene_free_bin_data(struct lp_scene *scene); + +void lp_scene_set_framebuffer_size( struct lp_scene *scene, + unsigned width, unsigned height ); + +void lp_bin_new_data_block( struct data_block_list *list ); + +void lp_bin_new_cmd_block( struct cmd_block_list *list ); + +unsigned lp_scene_data_size( const struct lp_scene *scene ); + +unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); + +void lp_scene_texture_reference( struct lp_scene *scene, + struct pipe_texture *texture ); + +boolean lp_scene_is_texture_referenced( const struct lp_scene *scene, + const struct pipe_texture *texture ); + + +/** + * Allocate space for a command/data in the bin's data buffer. + * Grow the block list if needed. + */ +static INLINE void * +lp_scene_alloc( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + tail->used += size; + return data; + } +} + + +/** + * As above, but with specific alignment. + */ +static INLINE void * +lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, + unsigned alignment ) +{ + struct data_block_list *list = &scene->data; + + if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + lp_bin_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + ubyte *data = tail->data + tail->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + tail->used += offset + size; + return data + offset; + } +} + + +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static INLINE void +lp_scene_putback_data( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + assert(list->tail->used >= size); + list->tail->used -= size; +} + + +/** Return pointer to a particular tile's bin. */ +static INLINE struct cmd_bin * +lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y) +{ + return &scene->tile[x][y]; +} + + +/** Remove all commands from a bin */ +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y); + + +/* Add a command to bin[x][y]. + */ +static INLINE void +lp_scene_bin_command( struct lp_scene *scene, + unsigned x, unsigned y, + lp_rast_cmd cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + struct cmd_block_list *list = &bin->commands; + + assert(x < scene->tiles_x); + assert(y < scene->tiles_y); + + if (list->tail->count == CMD_BLOCK_MAX) { + lp_bin_new_cmd_block( list ); + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } +} + + +/* Add a command to all active bins. + */ +static INLINE void +lp_scene_bin_everywhere( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) + for (j = 0; j < scene->tiles_y; j++) + lp_scene_bin_command( scene, i, j, cmd, arg ); +} + + +void +lp_scene_bin_state_command( struct lp_scene *scene, + lp_rast_cmd cmd, + const union lp_rast_cmd_arg arg ); + + +static INLINE unsigned +lp_scene_get_num_bins( const struct lp_scene *scene ) +{ + return scene->tiles_x * scene->tiles_y; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ); + +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); + + +#endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/src/gallium/drivers/llvmpipe/lp_scene_queue.c new file mode 100644 index 00000000000..43d74e4d89d --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.c @@ -0,0 +1,122 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Scene queue. We'll use two queues. One contains "full" scenes which + * are produced by the "setup" code. The other contains "empty" scenes + * which are produced by the "rast" code when it finishes rendering a scene. + */ + +#include "util/u_ringbuffer.h" +#include "util/u_memory.h" +#include "lp_scene_queue.h" + + + +#define MAX_SCENE_QUEUE 4 + +struct scene_packet { + struct util_packet header; + struct lp_scene *scene; +}; + +/** + * A queue of scenes + */ +struct lp_scene_queue +{ + struct util_ringbuffer *ring; +}; + + + +/** Allocate a new scene queue */ +struct lp_scene_queue * +lp_scene_queue_create(void) +{ + struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue); + if (queue == NULL) + return NULL; + + queue->ring = util_ringbuffer_create( MAX_SCENE_QUEUE * + sizeof( struct scene_packet ) / 4); + if (queue->ring == NULL) + goto fail; + + return queue; + +fail: + FREE(queue); + return NULL; +} + + +/** Delete a scene queue */ +void +lp_scene_queue_destroy(struct lp_scene_queue *queue) +{ + util_ringbuffer_destroy(queue->ring); + FREE(queue); +} + + +/** Remove first lp_scene from head of queue */ +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait) +{ + struct scene_packet packet; + enum pipe_error ret; + + ret = util_ringbuffer_dequeue(queue->ring, + &packet.header, + sizeof packet / 4, + wait ); + if (ret != PIPE_OK) + return NULL; + + return packet.scene; +} + + +/** Add an lp_scene to tail of queue */ +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene) +{ + struct scene_packet packet; + + packet.header.dwords = sizeof packet / 4; + packet.header.data24 = 0; + packet.scene = scene; + + util_ringbuffer_enqueue(queue->ring, &packet.header); +} + + + + + diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h new file mode 100644 index 00000000000..fd7c65a2c8b --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_SCENE_QUEUE +#define LP_SCENE_QUEUE + +struct lp_scene_queue; +struct lp_scene; + + +struct lp_scene_queue * +lp_scene_queue_create(void); + +void +lp_scene_queue_destroy(struct lp_scene_queue *queue); + +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait); + +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene); + + + + +#endif /* LP_BIN_QUEUE */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9b47415f003..1cd3ea9a840 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -33,9 +33,11 @@ #include "lp_texture.h" #include "lp_buffer.h" +#include "lp_fence.h" #include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" +#include "lp_context.h" #include "lp_debug.h" #ifdef DEBUG @@ -51,6 +53,10 @@ static const struct debug_named_value lp_debug_flags[] = { { "query", DEBUG_QUERY }, { "screen", DEBUG_SCREEN }, { "jit", DEBUG_JIT }, + { "show_tiles", DEBUG_SHOW_TILES }, + { "show_subtiles", DEBUG_SHOW_SUBTILES }, + { "counters", DEBUG_COUNTERS }, + { "nopt", DEBUG_NO_LLVM_OPT }, {NULL, 0} }; #endif @@ -110,6 +116,16 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 0; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 0; default: return 0; } @@ -295,10 +311,12 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) screen->base.is_format_supported = llvmpipe_is_format_supported; screen->base.surface_buffer_create = llvmpipe_surface_buffer_create; + screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; llvmpipe_init_screen_texture_funcs(&screen->base); llvmpipe_init_screen_buffer_funcs(&screen->base); + llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd3..3186069899d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -26,1479 +26,704 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines, triangles) + * Tiling engine. * - * \author Keith Whitwell <[email protected]> - * \author Brian Paul + * Builds per-tile display lists and executes them on calls to + * lp_setup_flush(). */ -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" -#include "util/u_format.h" -#include "util/u_math.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" -#include "lp_bld_debug.h" -#include "lp_tile_cache.h" -#include "lp_tile_soa.h" - - -#define DEBUG_VERTS 0 -#define DEBUG_FRAGS 0 - -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - - -#define MAX_QUADS 16 - +#include "util/u_pack_color.h" +#include "util/u_surface.h" +#include "lp_scene.h" +#include "lp_scene_queue.h" +#include "lp_buffer.h" +#include "lp_texture.h" +#include "lp_debug.h" +#include "lp_fence.h" +#include "lp_rast.h" +#include "lp_setup_context.h" -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_context { - struct llvmpipe_context *llvmpipe; - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const float (*vmax)[4]; - const float (*vmid)[4]; - const float (*vmin)[4]; - const float (*vprovoke)[4]; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - int facing; +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" - float pixel_offset; - struct quad_header quad[MAX_QUADS]; - struct quad_header *quad_ptrs[MAX_QUADS]; - unsigned count; +static void set_scene_state( struct setup_context *, unsigned ); - struct quad_interp_coef coef; - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - } span; +struct lp_scene * +lp_setup_get_current_scene(struct setup_context *setup) +{ + if (!setup->scene) { -#if DEBUG_FRAGS - uint numFragsEmitted; /**< per primitive */ - uint numFragsWritten; /**< per primitive */ -#endif + /* wait for a free/empty scene + */ + setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); - unsigned winding; /* which winding to cull */ -}; + if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ + lp_scene_set_framebuffer_size(setup->scene, + setup->fb.width, + setup->fb.height); + } + return setup->scene; +} -/** - * Execute fragment shader for the four fragments in the quad. - */ -ALIGN_STACK static void -shade_quads(struct llvmpipe_context *llvmpipe, - struct quad_header *quads[], - unsigned nr) +first_triangle( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { - struct lp_fragment_shader *fs = llvmpipe->fs; - struct quad_header *quad = quads[0]; - const unsigned x = quad->input.x0; - const unsigned y = quad->input.y0; - uint8_t *tile; - uint8_t *color; - void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; - unsigned chan_index; - unsigned q; - - assert(fs->current); - if(!fs->current) - return; - - /* Sanity checks */ - assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); - assert(x % TILE_VECTOR_WIDTH == 0); - assert(y % TILE_VECTOR_HEIGHT == 0); - for (q = 0; q < nr; ++q) { - assert(quads[q]->input.x0 == x + q*2); - assert(quads[q]->input.y0 == y); - } - - /* mask */ - for (q = 0; q < 4; ++q) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; + set_scene_state( setup, SETUP_ACTIVE ); + lp_setup_choose_triangle( setup ); + setup->triangle( setup, v0, v1, v2 ); +} - /* color buffer */ - if(llvmpipe->framebuffer.nr_cbufs >= 1 && - llvmpipe->framebuffer.cbufs[0]) { - tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); - color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); - } - else - color = NULL; - - /* depth buffer */ - if(llvmpipe->zsbuf_map) { - assert((x % 2) == 0); - assert((y % 2) == 0); - depth = llvmpipe->zsbuf_map + - y*llvmpipe->zsbuf_transfer->stride + - 2*x*util_format_get_blocksize(llvmpipe->zsbuf_transfer->texture->format); - } - else - depth = NULL; - - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(mask, 16)); - - assert(lp_check_alignment(depth, 16)); - assert(lp_check_alignment(color, 16)); - assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); - - /* run shader */ - fs->current->jit_function( &llvmpipe->jit_context, - x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, - &mask[0][0], - color, - depth); +static void +first_line( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + set_scene_state( setup, SETUP_ACTIVE ); + lp_setup_choose_line( setup ); + setup->line( setup, v0, v1 ); } +static void +first_point( struct setup_context *setup, + const float (*v0)[4]) +{ + set_scene_state( setup, SETUP_ACTIVE ); + lp_setup_choose_point( setup ); + setup->point( setup, v0 ); +} +static void reset_context( struct setup_context *setup ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + /* Reset derived state */ + setup->constants.stored_size = 0; + setup->constants.stored_data = NULL; + setup->fs.stored = NULL; + setup->dirty = ~0; -/** - * Do triangle cull test using tri determinant (sign indicates orientation) - * \return true if triangle is to be culled. - */ -static INLINE boolean -cull_tri(const struct setup_context *setup, float det) -{ - if (det != 0) { - /* if (det < 0 then Z points toward camera and triangle is - * counter-clockwise winding. - */ - unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + /* no current bin */ + setup->scene = NULL; - if ((winding & setup->winding) == 0) - return FALSE; - } + /* Reset some state: + */ + setup->clear.flags = 0; - /* Culled: + /* Have an explicit "start-binning" call and get rid of this + * pointer twiddling? */ - return TRUE; + setup->line = first_line; + setup->point = first_point; + setup->triangle = first_triangle; } - -/** - * Clip setup->quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip( struct setup_context *setup, struct quad_header *quad ) +/** Rasterize all scene's bins */ +static void +lp_setup_rasterize_scene( struct setup_context *setup, + boolean write_depth ) { - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (quad->input.x0 >= maxx || - quad->input.y0 >= maxy || - quad->input.x0 + 1 < minx || - quad->input.y0 + 1 < miny) { - /* totally clipped */ - quad->inout.mask = 0x0; - return; - } - if (quad->input.x0 < minx) - quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (quad->input.y0 < miny) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (quad->input.x0 == maxx - 1) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (quad->input.y0 == maxy - 1) - quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} + struct lp_scene *scene = lp_setup_get_current_scene(setup); + lp_rasterize_scene(setup->rast, + scene, + &setup->fb, + write_depth); + reset_context( setup ); -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~(2-1); + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } -static INLINE int block_x( int x ) + + +static void +begin_binning( struct setup_context *setup ) { - return x & ~(TILE_VECTOR_WIDTH - 1); + struct lp_scene *scene = lp_setup_get_current_scene(setup); + + LP_DBG(DEBUG_SETUP, "%s color: %s depth: %s\n", __FUNCTION__, + (setup->clear.flags & PIPE_CLEAR_COLOR) ? "clear": "load", + (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) ? "clear": "load"); + + if (setup->fb.nr_cbufs) { + if (setup->clear.flags & PIPE_CLEAR_COLOR) + lp_scene_bin_everywhere( scene, + lp_rast_clear_color, + setup->clear.color ); + else + lp_scene_bin_everywhere( scene, + lp_rast_load_color, + lp_rast_arg_null() ); + } + + if (setup->fb.zsbuf) { + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + lp_scene_bin_everywhere( scene, + lp_rast_clear_zstencil, + setup->clear.zstencil ); + else + lp_scene_bin_everywhere( scene, + lp_rast_load_zstencil, + lp_rast_arg_null() ); + } + + LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); } -/** - * Emit a quad (pass to next stage) with clipping. +/* This basically bins and then flushes any outstanding full-screen + * clears. + * + * TODO: fast path for fullscreen clears and no triangles. */ -static INLINE void -clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) +static void +execute_clears( struct setup_context *setup ) { - quad_clip( setup, quad ); - - if (quad->inout.mask) { - struct llvmpipe_context *lp = setup->llvmpipe; - -#if 1 - /* XXX: The blender expects 4 quads. This is far from efficient, but - * until we codegenerate single-quad variants of the fragment pipeline - * we need this hack. */ - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - struct quad_header quads[4]; - struct quad_header *quad_ptrs[4]; - int x0 = block_x(quad->input.x0); - unsigned i; - - assert(nr_quads == 4); - - for(i = 0; i < nr_quads; ++i) { - int x = x0 + 2*i; - if(x == quad->input.x0) - memcpy(&quads[i], quad, sizeof quads[i]); - else { - memset(&quads[i], 0, sizeof quads[i]); - quads[i].input.x0 = x; - quads[i].input.y0 = quad->input.y0; - quads[i].coef = quad->coef; - } - quad_ptrs[i] = &quads[i]; - } + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - shade_quads( lp, quad_ptrs, nr_quads ); -#else - shade_quads( lp, &quad, 1 ); -#endif - } + begin_binning( setup ); + lp_setup_rasterize_scene( setup, TRUE ); } -/** - * Render a horizontal span of quads - */ -static void flush_spans( struct setup_context *setup ) +static void +set_scene_state( struct setup_context *setup, + unsigned new_state ) { - const int step = TILE_VECTOR_WIDTH; - const int xleft0 = setup->span.left[0]; - const int xleft1 = setup->span.left[1]; - const int xright0 = setup->span.right[0]; - const int xright1 = setup->span.right[1]; - - - int minleft = block_x(MIN2(xleft0, xleft1)); - int maxright = MAX2(xright0, xright1); - int x; - - for (x = minleft; x < maxright; x += step) { - unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); - unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); - unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); - unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); - unsigned lx = x; - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - unsigned q = 0; - - unsigned skipmask_left0 = (1U << skip_left0) - 1U; - unsigned skipmask_left1 = (1U << skip_left1) - 1U; - - /* These calculations fail when step == 32 and skip_right == 0. - */ - unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); - unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); - - unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; - unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; - - if (mask0 | mask1) { - for(q = 0; q < nr_quads; ++q) { - unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); - setup->quad[q].input.x0 = lx; - setup->quad[q].input.y0 = setup->span.y; - setup->quad[q].inout.mask = quadmask; - setup->quad_ptrs[q] = &setup->quad[q]; - mask0 >>= 2; - mask1 >>= 2; - lx += 2; - } - assert(!(mask0 | mask1)); + unsigned old_state = setup->state; - shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads ); + if (old_state == new_state) + return; + + LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state); + + switch (new_state) { + case SETUP_ACTIVE: + begin_binning( setup ); + break; + + case SETUP_CLEARED: + if (old_state == SETUP_ACTIVE) { + assert(0); + return; } + break; + + case SETUP_FLUSHED: + if (old_state == SETUP_CLEARED) + execute_clears( setup ); + else + lp_setup_rasterize_scene( setup, TRUE ); + break; } - - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ + setup->state = new_state; } -#if DEBUG_VERTS -static void print_vertex(const struct setup_context *setup, - const float (*v)[4]) +void +lp_setup_flush( struct setup_context *setup, + unsigned flags ) { - int i; - debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad[0].nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, - v[i][0], v[i][1], v[i][2], v[i][3]); - if (util_is_inf_or_nan(v[i][0])) { - debug_printf(" NaN!\n"); - } - } + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + set_scene_state( setup, SETUP_FLUSHED ); } -#endif -/** - * Sort the vertices from top to bottom order, setting up the triangle - * edge fields (ebot, emaj, etop). - * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise - */ -static boolean setup_sort_vertices( struct setup_context *setup, - float det, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - setup->vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = v0[0][1]; - float y1 = v1[0][1]; - float y2 = v2[0][1]; - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; - } - } - } - setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; - setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; - setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); - - setup->oneoverarea = 1.0f / area; - - /* - debug_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, det ); - */ - if (util_is_inf_or_nan(setup->oneoverarea)) - return FALSE; - } +void +lp_setup_bind_framebuffer( struct setup_context *setup, + const struct pipe_framebuffer_state *fb ) +{ + struct lp_scene *scene = lp_setup_get_current_scene(setup); - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup->facing = - ((det > 0.0) ^ - (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - /* Prepare pixel offset for rasterisation: - * - pixel center (0.5, 0.5) for GL, or - * - assume (0.0, 0.0) for other APIs. - */ - if (setup->llvmpipe->rasterizer->gl_rasterization_rules) { - setup->pixel_offset = 0.5f; - } else { - setup->pixel_offset = 0.0f; - } + set_scene_state( setup, SETUP_FLUSHED ); - return TRUE; -} + /* re-get scene pointer, may have a new scene after flushing */ + scene = lp_setup_get_current_scene(setup); + util_copy_framebuffer_state(&setup->fb, fb); -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) -{ - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (pixel_offset, pixel_offset). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + - dady * (setup->vmin[0][1] - setup->pixel_offset))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ + lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); } -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) +void +lp_setup_clear( struct setup_context *setup, + const float *color, + double depth, + unsigned stencil, + unsigned flags ) { - setup->coef.dadx[0][i] = 0; - setup->coef.dady[0][i] = 0; - - /* need provoking vertex info! - */ - setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i]; -} + struct lp_scene *scene = lp_setup_get_current_scene(setup); + unsigned i; + LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0; - setup->coef.dady[1 + attrib][i] = 0; - /* need provoking vertex info! - */ - setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i]; + if (flags & PIPE_CLEAR_COLOR) { + for (i = 0; i < 4; ++i) + setup->clear.color.clear_color[i] = float_to_ubyte(color[i]); } -} - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_linear_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + - dady * (setup->vmin[0][1] - setup->pixel_offset))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { + setup->clear.zstencil.clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); } -} + if (setup->state == SETUP_ACTIVE) { + /* Add the clear to existing scene. In the unusual case where + * both color and depth-stencil are being cleared when there's + * already been some rendering, we could discard the currently + * binned scene and start again, but I don't see that as being + * a common usage. + */ + if (flags & PIPE_CLEAR_COLOR) + lp_scene_bin_everywhere( scene, + lp_rast_clear_color, + setup->clear.color ); -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* premultiply by 1/w (v[0][3] is always W): + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + lp_scene_bin_everywhere( scene, + lp_rast_clear_zstencil, + setup->clear.zstencil ); + } + else { + /* Put ourselves into the 'pre-clear' state, specifically to try + * and accumulate multiple clears to color and depth_stencil + * buffers which the app or state-tracker might issue + * separately. */ - float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; - float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - float botda = mida - mina; - float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - /* - debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin[vertSlot][i], - setup->vmid[vertSlot][i], - setup->vmax[vertSlot][i] - ); - */ - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (mina - - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + - dady * (setup->vmin[0][1] - setup->pixel_offset))); + set_scene_state( setup, SETUP_CLEARED ); + + setup->clear.flags |= flags; } } /** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. - * Z and W are copied from posCoef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + * Emit a fence. */ -static void -setup_fragcoord_coeff(struct setup_context *setup, uint slot) +struct pipe_fence_handle * +lp_setup_fence( struct setup_context *setup ) { - /*X*/ - setup->coef.a0[1 + slot][0] = 0; - setup->coef.dadx[1 + slot][0] = 1.0; - setup->coef.dady[1 + slot][0] = 0.0; - /*Y*/ - setup->coef.a0[1 + slot][1] = 0.0; - setup->coef.dadx[1 + slot][1] = 0.0; - setup->coef.dady[1 + slot][1] = 1.0; - /*Z*/ - setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2]; - setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2]; - setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2]; - /*W*/ - setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3]; - setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3]; - setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3]; -} + struct lp_scene *scene = lp_setup_get_current_scene(setup); + const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ + struct lp_fence *fence = lp_fence_create(rank); + LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); + set_scene_state( setup, SETUP_ACTIVE ); -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients( struct setup_context *setup ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; + /* insert the fence into all command bins */ + lp_scene_bin_everywhere( scene, + lp_rast_fence, + lp_rast_arg_fence(fence) ); - /* z and w are done by linear interpolation: - */ - tri_pos_coeff(setup, 0, 2); - tri_pos_coeff(setup, 0, 3); + return (struct pipe_fence_handle *) fence; +} - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - tri_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - tri_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } +void +lp_setup_set_triangle_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface, + boolean scissor ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } + setup->ccw_is_frontface = ccw_is_frontface; + setup->cullmode = cull_mode; + setup->triangle = first_triangle; + setup->scissor_test = scissor; } -static void setup_tri_edges( struct setup_context *setup ) +void +lp_setup_set_fs_inputs( struct setup_context *setup, + const struct lp_shader_input *input, + unsigned nr ) { - float vmin_x = setup->vmin[0][0] + setup->pixel_offset; - float vmid_x = setup->vmid[0][0] + setup->pixel_offset; - - float vmin_y = setup->vmin[0][1] - setup->pixel_offset; - float vmid_y = setup->vmid[0][1] - setup->pixel_offset; - float vmax_y = setup->vmax[0][1] - setup->pixel_offset; - - setup->emaj.sy = ceilf(vmin_y); - setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = ceilf(vmid_y); - setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = ceilf(vmin_y); - setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; -} + LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr); + memcpy( setup->fs.input, input, nr * sizeof input[0] ); + setup->fs.nr_inputs = nr; +} -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct setup_context *setup, - struct edge *eleft, - struct edge *eright, - unsigned lines ) +void +lp_setup_set_fs_functions( struct setup_context *setup, + lp_jit_frag_func jit_function0, + lp_jit_frag_func jit_function1, + boolean opaque ) { - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - assert((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - if (start_y < miny) - start_y = miny; - - finish_y = sy + lines; - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); - } + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function0); + /* FIXME: reference count */ - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; + setup->fs.current.jit_function[0] = jit_function0; + setup->fs.current.jit_function[1] = jit_function1; + setup->fs.current.opaque = opaque; + setup->dirty |= LP_SETUP_NEW_FS; } - -/** - * Recalculate prim's determinant. This is needed as we don't have - * get this information through the vbuf_render interface & we must - * calculate it here. - */ -static float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +void +lp_setup_set_fs_constants(struct setup_context *setup, + struct pipe_buffer *buffer) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer); + + pipe_buffer_reference(&setup->constants.current, buffer); + + setup->dirty |= LP_SETUP_NEW_CONSTANTS; } -/** - * Do setup for triangle rasterization, then render the triangle. - */ -void llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +void +lp_setup_set_alpha_ref_value( struct setup_context *setup, + float alpha_ref_value ) { - float det; - -#if DEBUG_VERTS - debug_printf("Setup triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif + LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value); - if (setup->llvmpipe->no_rast) - return; - - det = calc_det(v0, v1, v2); - /* - debug_printf("%s\n", __FUNCTION__ ); - */ + if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { + setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; + setup->dirty |= LP_SETUP_NEW_FS; + } +} -#if DEBUG_FRAGS - setup->numFragsEmitted = 0; - setup->numFragsWritten = 0; -#endif +void +lp_setup_set_blend_color( struct setup_context *setup, + const struct pipe_blend_color *blend_color ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (cull_tri( setup, det )) - return; + assert(blend_color); - if (!setup_sort_vertices( setup, det, v0, v1, v2 )) - return; - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); + if(memcmp(&setup->blend_color.current, blend_color, sizeof *blend_color) != 0) { + memcpy(&setup->blend_color.current, blend_color, sizeof *blend_color); + setup->dirty |= LP_SETUP_NEW_BLEND_COLOR; + } +} - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES); - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ +void +lp_setup_set_scissor( struct setup_context *setup, + const struct pipe_scissor_state *scissor ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - /* init_constant_attribs( setup ); */ + assert(scissor); - if (setup->oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) { + setup->scissor.current = *scissor; /* struct copy */ + setup->dirty |= LP_SETUP_NEW_SCISSOR; } - - flush_spans( setup ); - -#if DEBUG_FRAGS - printf("Tri: %u frags emitted, %u written\n", - setup->numFragsEmitted, - setup->numFragsWritten); -#endif } - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -linear_pos_coeff(struct setup_context *setup, - uint vertSlot, uint i) +void +lp_setup_set_flatshade_first( struct setup_context *setup, + boolean flatshade_first ) { - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + - dady * (setup->vmin[0][1] - setup->pixel_offset))); + setup->flatshade_first = flatshade_first; } -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -line_linear_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) +void +lp_setup_set_vertex_info( struct setup_context *setup, + struct vertex_info *vertex_info ) { - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + - dady * (setup->vmin[0][1] - setup->pixel_offset))); - } + /* XXX: just silently holding onto the pointer: + */ + setup->vertex_info = vertex_info; } /** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a line. + * Called during state validation when LP_NEW_TEXTURE is set. */ -static void -line_persp_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) +void +lp_setup_set_sampler_textures( struct setup_context *setup, + unsigned num, struct pipe_texture **texture) { unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - const float da = a1 - a0; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + - dady * (setup->vmin[0][1] - setup->pixel_offset))); + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SAMPLERS); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; + jit_tex->width = tex->width0; + jit_tex->height = tex->height0; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + else + /* FIXME: map the rendertarget */ + assert(0); + + /* the scene references this texture */ + { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + lp_scene_texture_reference(scene, tex); + } + } } + + setup->dirty |= LP_SETUP_NEW_FS; } /** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmax are initialized. + * Is the given texture referenced by any scene? + * Note: we have to check all scenes including any scenes currently + * being rendered and the current scene being built. */ -static INLINE boolean -setup_line_coefficients(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) +unsigned +lp_setup_is_texture_referenced( const struct setup_context *setup, + const struct pipe_texture *texture ) { - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - float area; - - /* use setup->vmin, vmax to point to vertices */ - if (llvmpipe->rasterizer->flatshade_first) - setup->vprovoke = v0; - else - setup->vprovoke = v1; - setup->vmin = v0; - setup->vmax = v1; - - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - - /* NOTE: this is not really area but something proportional to it */ - area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; - if (area == 0.0f || util_is_inf_or_nan(area)) - return FALSE; - setup->oneoverarea = 1.0f / area; - - /* z and w are done by linear interpolation: - */ - linear_pos_coeff(setup, 0, 2); - linear_pos_coeff(setup, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - line_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - line_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } + unsigned i; - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } + /* check the render targets */ + for (i = 0; i < setup->fb.nr_cbufs; i++) { + if (setup->fb.cbufs[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + } + if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) { + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } - return TRUE; -} - -/** - * Plot a pixel in a line segment. - */ -static INLINE void -plot(struct setup_context *setup, int x, int y) -{ - const int iy = y & 1; - const int ix = x & 1; - const int quadX = x - ix; - const int quadY = y - iy; - const int mask = (1 << ix) << (2 * iy); - - if (quadX != setup->quad[0].input.x0 || - quadY != setup->quad[0].input.y0) - { - /* flush prev quad, start new quad */ - - if (setup->quad[0].input.x0 != -1) - clip_emit_quad( setup, &setup->quad[0] ); - - setup->quad[0].input.x0 = quadX; - setup->quad[0].input.y0 = quadY; - setup->quad[0].inout.mask = 0x0; + /* check textures referenced by the scene */ + for (i = 0; i < Elements(setup->scenes); i++) { + if (lp_scene_is_texture_referenced(setup->scenes[i], texture)) { + return PIPE_REFERENCED_FOR_READ; + } } - setup->quad[0].inout.mask |= mask; + return PIPE_UNREFERENCED; } /** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. + * Called by vbuf code when we're about to draw something. */ void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) +lp_setup_update_state( struct setup_context *setup ) { - int x0 = (int) v0[0][0]; - int x1 = (int) v1[0][0]; - int y0 = (int) v0[0][1]; - int y1 = (int) v1[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - -#if DEBUG_VERTS - debug_printf("Setup line:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); -#endif - - if (setup->llvmpipe->no_rast) - return; - - if (dx == 0 && dy == 0) - return; + struct lp_scene *scene = lp_setup_get_current_scene(setup); - if (!setup_line_coefficients(setup, v0, v1)) - return; - - assert(v0[0][0] < 1.0e9); - assert(v0[0][1] < 1.0e9); - assert(v1[0][0] < 1.0e9); - assert(v1[0][1] < 1.0e9); - - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } + assert(setup->fs.current.jit_function); - assert(dx >= 0); - assert(dy >= 0); - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); + if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) { + uint8_t *stored; + unsigned i, j; - setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; - setup->quad[0].inout.mask = 0x0; + stored = lp_scene_alloc_aligned(scene, 4 * 16, 16); - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad[0].input.coverage[0] = - setup->quad[0].input.coverage[1] = - setup->quad[0].input.coverage[2] = - setup->quad[0].input.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } + /* smear each blend color component across 16 ubyte elements */ + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); + for (j = 0; j < 16; ++j) + stored[i*16 + j] = c; } - } - /* draw final quad */ - if (setup->quad[0].inout.mask) { - clip_emit_quad( setup, &setup->quad[0] ); + setup->blend_color.stored = stored; + + setup->fs.current.jit_context.blend_color = setup->blend_color.stored; + setup->dirty |= LP_SETUP_NEW_FS; } -} + if (setup->dirty & LP_SETUP_NEW_SCISSOR) { + float *stored; -static void -point_persp_coeff(struct setup_context *setup, - const float (*vert)[4], - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for(i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0.0F; - setup->coef.dady[1 + attrib][i] = 0.0F; - setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3]; - } -} + stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16); + stored[0] = (float) setup->scissor.current.minx; + stored[1] = (float) setup->scissor.current.miny; + stored[2] = (float) setup->scissor.current.maxx; + stored[3] = (float) setup->scissor.current.maxy; -/** - * Do setup for point rasterization, then render the point. - * Round or square points... - * XXX could optimize a lot for 1-pixel points. - */ -void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const int sizeAttr = setup->llvmpipe->psize_slot; - const float size - = sizeAttr > 0 ? v0[sizeAttr][0] - : setup->llvmpipe->rasterizer->point_size; - const float halfSize = 0.5F * size; - const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth; - const float x = v0[0][0]; /* Note: data[0] is always position */ - const float y = v0[0][1]; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - -#if DEBUG_VERTS - debug_printf("Setup point:\n"); - print_vertex(setup, v0); -#endif - - if (llvmpipe->no_rast) - return; + setup->scissor.stored = stored; - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS); - - /* For points, all interpolants are constant-valued. - * However, for point sprites, we'll need to setup texcoords appropriately. - * XXX: which coefficients are the texcoords??? - * We may do point sprites as textured quads... - * - * KW: We don't know which coefficients are texcoords - ultimately - * the choice of what interpolation mode to use for each attribute - * should be determined by the fragment program, using - * per-attribute declaration statements that include interpolation - * mode as a parameter. So either the fragment program will have - * to be adjusted for pointsprite vs normal point behaviour, or - * otherwise a special interpolation mode will have to be defined - * which matches the required behaviour for point sprites. But - - * the latter is not a feature of normal hardware, and as such - * probably should be ruled out on that basis. - */ - setup->vprovoke = v0; + setup->fs.current.jit_context.scissor_xmin = stored[0]; + setup->fs.current.jit_context.scissor_ymin = stored[1]; + setup->fs.current.jit_context.scissor_xmax = stored[2]; + setup->fs.current.jit_context.scissor_ymax = stored[3]; - /* setup Z, W */ - const_pos_coeff(setup, 0, 2); - const_pos_coeff(setup, 0, 3); + setup->dirty |= LP_SETUP_NEW_FS; + } - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; + if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { + struct pipe_buffer *buffer = setup->constants.current; - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } + if(buffer) { + unsigned current_size = buffer->size; + const void *current_data = llvmpipe_buffer(buffer)->data; - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } + /* TODO: copy only the actually used constants? */ + if(setup->constants.stored_size != current_size || + !setup->constants.stored_data || + memcmp(setup->constants.stored_data, + current_data, + current_size) != 0) { + void *stored; - if (halfSize <= 0.5 && !round) { - /* special case for 1-pixel points */ - const int ix = ((int) x) & 1; - const int iy = ((int) y) & 1; - setup->quad[0].input.x0 = (int) x - ix; - setup->quad[0].input.y0 = (int) y - iy; - setup->quad[0].inout.mask = (1 << ix) << (2 * iy); - clip_emit_quad( setup, &setup->quad[0] ); - } - else { - if (round) { - /* rounded points */ - const int ixmin = block((int) (x - halfSize)); - const int ixmax = block((int) (x + halfSize)); - const int iymin = block((int) (y - halfSize)); - const int iymax = block((int) (y + halfSize)); - const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ - const float rmax = halfSize + 0.7071F; - const float rmin2 = MAX2(0.0F, rmin * rmin); - const float rmax2 = rmax * rmax; - const float cscale = 1.0F / (rmax2 - rmin2); - int ix, iy; - - for (iy = iymin; iy <= iymax; iy += 2) { - for (ix = ixmin; ix <= ixmax; ix += 2) { - float dx, dy, dist2, cover; - - setup->quad[0].inout.mask = 0x0; - - dx = (ix + 0.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_RIGHT; - } - - dx = (ix + 0.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; - } - - if (setup->quad[0].inout.mask) { - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } + stored = lp_scene_alloc(scene, current_size); + if(stored) { + memcpy(stored, + current_data, + current_size); + setup->constants.stored_size = current_size; + setup->constants.stored_data = stored; } } } else { - /* square points */ - const int xmin = (int) (x + 0.75 - halfSize); - const int ymin = (int) (y + 0.25 - halfSize); - const int xmax = xmin + (int) size; - const int ymax = ymin + (int) size; - /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ - const int ixmin = block(xmin); - const int ixmax = block(xmax - 1); - const int iymin = block(ymin); - const int iymax = block(ymax - 1); - int ix, iy; - - /* - debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); - */ - for (iy = iymin; iy <= iymax; iy += 2) { - uint rowMask = 0xf; - if (iy < ymin) { - /* above the top edge */ - rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - } - if (iy + 1 >= ymax) { - /* below the bottom edge */ - rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); - } + setup->constants.stored_size = 0; + setup->constants.stored_data = NULL; + } - for (ix = ixmin; ix <= ixmax; ix += 2) { - uint mask = rowMask; - - if (ix < xmin) { - /* fragment is past left edge of point, turn off left bits */ - mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - } - if (ix + 1 >= xmax) { - /* past the right edge */ - mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - } - - setup->quad[0].inout.mask = mask; - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } + setup->fs.current.jit_context.constants = setup->constants.stored_data; + setup->dirty |= LP_SETUP_NEW_FS; + } + + + if(setup->dirty & LP_SETUP_NEW_FS) { + if(!setup->fs.stored || + memcmp(setup->fs.stored, + &setup->fs.current, + sizeof setup->fs.current) != 0) { + /* The fs state that's been stored in the scene is different from + * the new, current state. So allocate a new lp_rast_state object + * and append it to the bin's setup data buffer. + */ + struct lp_rast_state *stored = + (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored); + if(stored) { + memcpy(stored, + &setup->fs.current, + sizeof setup->fs.current); + setup->fs.stored = stored; + + /* put the state-set command into all bins */ + lp_scene_bin_state_command( scene, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); } } } + + setup->dirty = 0; + + assert(setup->fs.stored); } -void llvmpipe_setup_prepare( struct setup_context *setup ) + + +/* Only caller is lp_setup_vbuf_destroy() + */ +void +lp_setup_destroy( struct setup_context *setup ) { - struct llvmpipe_context *lp = setup->llvmpipe; + reset_context( setup ); - if (lp->dirty) { - llvmpipe_update_derived(lp); - } + pipe_buffer_reference(&setup->constants.current, NULL); - if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && - lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && - lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { - /* we'll do culling */ - setup->winding = lp->rasterizer->cull_mode; - } - else { - /* 'draw' will do culling */ - setup->winding = PIPE_WINDING_NONE; + /* free the scenes in the 'empty' queue */ + while (1) { + struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes, FALSE); + if (!scene) + break; + lp_scene_destroy(scene); } -} - + lp_rast_destroy( setup->rast ); -void llvmpipe_setup_destroy_context( struct setup_context *setup ) -{ - align_free( setup ); + FREE( setup ); } /** - * Create a new primitive setup/render stage. + * Create a new primitive tiling engine. Plug it into the backend of + * the draw module. Currently also creates a rasterizer to use with + * it. */ -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ) +struct setup_context * +lp_setup_create( struct pipe_screen *screen, + struct draw_context *draw ) { - struct setup_context *setup; unsigned i; + struct setup_context *setup = CALLOC_STRUCT(setup_context); - setup = align_malloc(sizeof(struct setup_context), 16); if (!setup) return NULL; - memset(setup, 0, sizeof *setup); - setup->llvmpipe = llvmpipe; + lp_setup_init_vbuf(setup); + + setup->empty_scenes = lp_scene_queue_create(); + if (!setup->empty_scenes) + goto fail; - for (i = 0; i < MAX_QUADS; i++) { - setup->quad[i].coef = &setup->coef; + setup->rast = lp_rast_create( screen, setup->empty_scenes ); + if (!setup->rast) + goto fail; + + setup->vbuf = draw_vbuf_stage(draw, &setup->base); + if (!setup->vbuf) + goto fail; + + draw_set_rasterize_stage(draw, setup->vbuf); + draw_set_render(draw, &setup->base); + + /* create some empty scenes */ + for (i = 0; i < MAX_SCENES; i++) { + setup->scenes[i] = lp_scene_create(); + lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]); } - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ + setup->triangle = first_triangle; + setup->line = first_line; + setup->point = first_point; + + setup->dirty = ~0; return setup; + +fail: + if (setup->rast) + lp_rast_destroy( setup->rast ); + + if (setup->vbuf) + ; + + if (setup->empty_scenes) + lp_scene_queue_destroy(setup->empty_scenes); + + FREE(setup); + return NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 89c43da0460..0e155a7dc31 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -27,27 +27,113 @@ #ifndef LP_SETUP_H #define LP_SETUP_H -struct setup_context; -struct llvmpipe_context; +#include "pipe/p_compiler.h" +#include "lp_jit.h" + +struct draw_context; +struct vertex_info; + +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + +/* Describes how to generate all the fragment shader inputs from the + * the vertices passed into our triangle/line/point functions. + * + * Vertices are treated as an array of float[4] values, indexed by + * src_index. + */ +struct lp_shader_input { + enum lp_interp interp; /* how to interpolate values */ + unsigned src_index; /* where to find values in incoming vertices */ +}; + +struct pipe_texture; +struct pipe_surface; +struct pipe_buffer; +struct pipe_blend_color; +struct pipe_screen; +struct pipe_framebuffer_state; +struct lp_fragment_shader; +struct lp_jit_context; + +struct setup_context * +lp_setup_create( struct pipe_screen *screen, + struct draw_context *draw ); + +void +lp_setup_clear(struct setup_context *setup, + const float *clear_color, + double clear_depth, + unsigned clear_stencil, + unsigned flags); + +struct pipe_fence_handle * +lp_setup_fence( struct setup_context *setup ); + + +void +lp_setup_flush( struct setup_context *setup, + unsigned flags ); + + +void +lp_setup_bind_framebuffer( struct setup_context *setup, + const struct pipe_framebuffer_state *fb ); void -llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ); +lp_setup_set_triangle_state( struct setup_context *setup, + unsigned cullmode, + boolean front_is_ccw, + boolean scissor ); void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]); +lp_setup_set_fs_inputs( struct setup_context *setup, + const struct lp_shader_input *interp, + unsigned nr ); void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ); +lp_setup_set_fs_functions( struct setup_context *setup, + lp_jit_frag_func jit_function0, + lp_jit_frag_func jit_function1, + boolean opaque ); +void +lp_setup_set_fs_constants(struct setup_context *setup, + struct pipe_buffer *buffer); + + +void +lp_setup_set_alpha_ref_value( struct setup_context *setup, + float alpha_ref_value ); + +void +lp_setup_set_blend_color( struct setup_context *setup, + const struct pipe_blend_color *blend_color ); + +void +lp_setup_set_scissor( struct setup_context *setup, + const struct pipe_scissor_state *scissor ); + +void +lp_setup_set_sampler_textures( struct setup_context *setup, + unsigned num, struct pipe_texture **texture); + +unsigned +lp_setup_is_texture_referenced( const struct setup_context *setup, + const struct pipe_texture *texture ); + +void +lp_setup_set_flatshade_first( struct setup_context *setup, + boolean flatshade_first ); + +void +lp_setup_set_vertex_info( struct setup_context *setup, + struct vertex_info *info ); -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ); -void llvmpipe_setup_prepare( struct setup_context *setup ); -void llvmpipe_setup_destroy_context( struct setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h new file mode 100644 index 00000000000..a5fc34e54a2 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -0,0 +1,159 @@ +/************************************************************************** + * + * Copyright 2007-2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * The setup code is concerned with point/line/triangle setup and + * putting commands/data into the bins. + */ + + +#ifndef LP_SETUP_CONTEXT_H +#define LP_SETUP_CONTEXT_H + +#include "lp_setup.h" +#include "lp_rast.h" +#include "lp_tile_soa.h" /* for TILE_SIZE */ +#include "lp_scene.h" + +#include "draw/draw_vbuf.h" + +#define LP_SETUP_NEW_FS 0x01 +#define LP_SETUP_NEW_CONSTANTS 0x02 +#define LP_SETUP_NEW_BLEND_COLOR 0x04 +#define LP_SETUP_NEW_SCISSOR 0x08 + + +struct lp_scene_queue; + + +/** Max number of scenes */ +#define MAX_SCENES 2 + + + +/** + * Point/line/triangle setup context. + * Note: "stored" below indicates data which is stored in the bins, + * not arbitrary malloc'd memory. + * + * + * Subclass of vbuf_render, plugged directly into the draw module as + * the rendering backend. + */ +struct setup_context +{ + struct vbuf_render base; + + struct vertex_info *vertex_info; + uint prim; + uint vertex_size; + uint nr_vertices; + uint vertex_buffer_size; + void *vertex_buffer; + + /* Final pipeline stage for draw module. Draw module should + * create/install this itself now. + */ + struct draw_stage *vbuf; + struct lp_rasterizer *rast; + struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ + struct lp_scene *scene; /**< current scene being built */ + struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ + + boolean flatshade_first; + boolean ccw_is_frontface; + boolean scissor_test; + unsigned cullmode; + + struct pipe_framebuffer_state fb; + + struct { + unsigned flags; + union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */ + union lp_rast_cmd_arg zstencil; /**< lp_rast_clear_zstencil() cmd */ + } clear; + + enum { + SETUP_FLUSHED, + SETUP_CLEARED, + SETUP_ACTIVE + } state; + + struct { + struct lp_shader_input input[PIPE_MAX_ATTRIBS]; + unsigned nr_inputs; + + const struct lp_rast_state *stored; /**< what's in the scene */ + struct lp_rast_state current; /**< currently set state */ + } fs; + + /** fragment shader constants */ + struct { + struct pipe_buffer *current; + unsigned stored_size; + const void *stored_data; + } constants; + + struct { + struct pipe_blend_color current; + uint8_t *stored; + } blend_color; + + struct { + struct pipe_scissor_state current; + const void *stored; + } scissor; + + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ + + void (*point)( struct setup_context *, + const float (*v0)[4]); + + void (*line)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4]); + + void (*triangle)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]); +}; + +void lp_setup_choose_triangle( struct setup_context *setup ); +void lp_setup_choose_line( struct setup_context *setup ); +void lp_setup_choose_point( struct setup_context *setup ); + +struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); + +void lp_setup_init_vbuf(struct setup_context *setup); + +void lp_setup_update_state( struct setup_context *setup ); + +void lp_setup_destroy( struct setup_context *setup ); + +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h b/src/gallium/drivers/llvmpipe/lp_setup_line.c index d91cd35b3b7..feea79d3943 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,28 +22,26 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#ifndef TGSI_DUMP_C_H -#define TGSI_DUMP_C_H +/* + * Binning code for lines + */ -#include "pipe/p_shader_tokens.h" +#include "lp_setup_context.h" -#if defined __cplusplus -extern "C" { -#endif - -#define TGSI_DUMP_C_IGNORED 1 -#define TGSI_DUMP_C_DEFAULT 2 +static void line_nop( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4] ) +{ +} -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ); -#if defined __cplusplus +void +lp_setup_choose_line( struct setup_context *setup ) +{ + setup->line = line_nop; } -#endif -#endif /* TGSI_DUMP_C_H */ + diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 0676e2f42ac..f03ca729b24 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,17 +22,25 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#ifndef LP_VBUF_H -#define LP_VBUF_H +/* + * Binning code for points + */ +#include "lp_setup_context.h" -struct llvmpipe_context; +static void point_nop( struct setup_context *setup, + const float (*v0)[4] ) +{ +} -extern struct vbuf_render * -lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); + +void +lp_setup_choose_point( struct setup_context *setup ) +{ + setup->point = point_nop; +} -#endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c new file mode 100644 index 00000000000..9e59a6602cc --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -0,0 +1,618 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_rast.h" + +#define NUM_CHANNELS 4 + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_rast_triangle *tri, + unsigned slot, + const float value, + unsigned i ) +{ + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0.0f; + tri->inputs.dady[slot][i] = 0.0f; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4]) +{ + /*X*/ + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; + /*Y*/ + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; + /*Z*/ + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2); + /*W*/ + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3); +} + + +static void setup_facing_coef( struct lp_rast_triangle *tri, + unsigned slot, + boolean frontface ) +{ + constant_coef( tri, slot, 1.0f - frontface, 0 ); + constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ +} + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_tri_coefficients( struct setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontface) +{ + unsigned slot; + + /* The internal position input is in slot zero: + */ + setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned i; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(tri, slot+1, v3[vert_attr][i], i); + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + break; + + case LP_INTERP_POSITION: + /* XXX: fix me - duplicates the values in slot zero. + */ + setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3); + break; + + case LP_INTERP_FACING: + setup_facing_coef(tri, slot+1, frontface); + break; + + default: + assert(0); + } + } +} + + + +static INLINE int subpixel_snap( float a ) +{ + return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); +} + + + +/** + * Alloc space for a new triangle plus the input.a0/dadx/dady arrays + * immediately after it. + * The memory is allocated from the per-scene pool, not per-tile. + * \param tri_size returns number of bytes allocated + * \param nr_inputs number of fragment shader inputs + * \return pointer to triangle space + */ +static INLINE struct lp_rast_triangle * +alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) +{ + unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + struct lp_rast_triangle *tri; + unsigned bytes; + char *inputs; + + assert(sizeof(*tri) % 16 == 0); + + bytes = sizeof(*tri) + (3 * input_array_sz); + + tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + + inputs = (char *) (tri + 1); + tri->inputs.a0 = (float (*)[4]) inputs; + tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); + tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + + *tri_size = bytes; + + return tri; +} + + + +/** + * Do basic setup for triangle rasterization and determine which + * framebuffer tiles are touched. Put the triangle in the scene's + * bins for the tiles which we overlap. + */ +static void +do_triangle_ccw(struct setup_context *setup, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontfacing ) +{ + /* x/y positions in fixed point */ + const int x1 = subpixel_snap(v1[0][0]); + const int x2 = subpixel_snap(v2[0][0]); + const int x3 = subpixel_snap(v3[0][0]); + const int y1 = subpixel_snap(v1[0][1]); + const int y2 = subpixel_snap(v2[0][1]); + const int y3 = subpixel_snap(v3[0][1]); + + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *tri; + int area; + float oneoverarea; + int minx, maxx, miny, maxy; + unsigned tri_bytes; + + tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); + + tri->dx12 = x1 - x2; + tri->dx23 = x2 - x3; + tri->dx31 = x3 - x1; + + tri->dy12 = y1 - y2; + tri->dy23 = y2 - y3; + tri->dy31 = y3 - y1; + + area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); + + LP_COUNT(nr_tris); + + /* Cull non-ccw and zero-sized triangles. + * + * XXX: subject to overflow?? + */ + if (area <= 0) { + lp_scene_putback_data( scene, tri_bytes ); + LP_COUNT(nr_culled_tris); + return; + } + + /* Bounding rectangle (in pixels) */ + minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; + miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; + + if (setup->scissor_test) { + minx = MAX2(minx, setup->scissor.current.minx); + maxx = MIN2(maxx, setup->scissor.current.maxx); + miny = MAX2(miny, setup->scissor.current.miny); + maxy = MIN2(maxy, setup->scissor.current.maxy); + } + + if (miny == maxy || + minx == maxx) { + lp_scene_putback_data( scene, tri_bytes ); + LP_COUNT(nr_culled_tris); + return; + } + + /* + */ + oneoverarea = ((float)FIXED_ONE) / (float)area; + + /* Setup parameter interpolants: + */ + setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); + + /* half-edge constants, will be interated over the whole render target. + */ + tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; + tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; + tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; + + /* correct for top-left fill convention: + */ + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; + + tri->dy12 *= FIXED_ONE; + tri->dy23 *= FIXED_ONE; + tri->dy31 *= FIXED_ONE; + + tri->dx12 *= FIXED_ONE; + tri->dx23 *= FIXED_ONE; + tri->dx31 *= FIXED_ONE; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + tri->eo1 = 0; + if (tri->dy12 < 0) tri->eo1 -= tri->dy12; + if (tri->dx12 > 0) tri->eo1 += tri->dx12; + + tri->eo2 = 0; + if (tri->dy23 < 0) tri->eo2 -= tri->dy23; + if (tri->dx23 > 0) tri->eo2 += tri->dx23; + + tri->eo3 = 0; + if (tri->dy31 < 0) tri->eo3 -= tri->dy31; + if (tri->dx31 > 0) tri->eo3 += tri->dx31; + + /* Calculate trivial accept offsets from the above. + */ + tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; + tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; + tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; + + /* Fill in the inputs.step[][] arrays. + * We've manually unrolled some loops here. + */ + { + const int xstep1 = -tri->dy12; + const int xstep2 = -tri->dy23; + const int xstep3 = -tri->dy31; + const int ystep1 = tri->dx12; + const int ystep2 = tri->dx23; + const int ystep3 = tri->dx31; + +#define SETUP_STEP(i, x, y) \ + do { \ + tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \ + tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \ + tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \ + } while (0) + + SETUP_STEP(0, 0, 0); + SETUP_STEP(1, 1, 0); + SETUP_STEP(2, 0, 1); + SETUP_STEP(3, 1, 1); + + SETUP_STEP(4, 2, 0); + SETUP_STEP(5, 3, 0); + SETUP_STEP(6, 2, 1); + SETUP_STEP(7, 3, 1); + + SETUP_STEP(8, 0, 2); + SETUP_STEP(9, 1, 2); + SETUP_STEP(10, 0, 3); + SETUP_STEP(11, 1, 3); + + SETUP_STEP(12, 2, 2); + SETUP_STEP(13, 3, 2); + SETUP_STEP(14, 2, 3); + SETUP_STEP(15, 3, 3); +#undef STEP + } + + /* + * All fields of 'tri' are now set. The remaining code here is + * concerned with binning. + */ + + /* Convert to tile coordinates: + */ + minx = minx / TILE_SIZE; + miny = miny / TILE_SIZE; + maxx = maxx / TILE_SIZE; + maxy = maxy / TILE_SIZE; + + /* Clamp maxx, maxy to framebuffer size + */ + maxx = MIN2(maxx, scene->tiles_x - 1); + maxy = MIN2(maxy, scene->tiles_y - 1); + + /* Determine which tile(s) intersect the triangle's bounding box + */ + if (miny == maxy && minx == maxx) + { + /* Triangle is contained in a single tile: + */ + lp_scene_bin_command( scene, minx, miny, lp_rast_triangle, + lp_rast_arg_triangle(tri) ); + } + else + { + int c1 = (tri->c1 + + tri->dx12 * miny * TILE_SIZE - + tri->dy12 * minx * TILE_SIZE); + int c2 = (tri->c2 + + tri->dx23 * miny * TILE_SIZE - + tri->dy23 * minx * TILE_SIZE); + int c3 = (tri->c3 + + tri->dx31 * miny * TILE_SIZE - + tri->dy31 * minx * TILE_SIZE); + + int ei1 = tri->ei1 << TILE_ORDER; + int ei2 = tri->ei2 << TILE_ORDER; + int ei3 = tri->ei3 << TILE_ORDER; + + int eo1 = tri->eo1 << TILE_ORDER; + int eo2 = tri->eo2 << TILE_ORDER; + int eo3 = tri->eo3 << TILE_ORDER; + + int xstep1 = -(tri->dy12 << TILE_ORDER); + int xstep2 = -(tri->dy23 << TILE_ORDER); + int xstep3 = -(tri->dy31 << TILE_ORDER); + + int ystep1 = tri->dx12 << TILE_ORDER; + int ystep2 = tri->dx23 << TILE_ORDER; + int ystep3 = tri->dx31 << TILE_ORDER; + int x, y; + + + /* Test tile-sized blocks against the triangle. + * Discard blocks fully outside the tri. If the block is fully + * contained inside the tri, bin an lp_rast_shade_tile command. + * Else, bin a lp_rast_triangle command. + */ + for (y = miny; y <= maxy; y++) + { + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; + boolean in = FALSE; /* are we inside the triangle? */ + + for (x = minx; x <= maxx; x++) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + /* do nothing */ + LP_COUNT(nr_empty_64); + if (in) + break; /* exiting triangle, all done with this row */ + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + /* triangle covers the whole tile- shade whole tile */ + LP_COUNT(nr_fully_covered_64); + in = TRUE; + if(setup->fs.current.opaque) { + lp_scene_bin_reset( scene, x, y ); + lp_scene_bin_command( scene, x, y, + lp_rast_set_state, + lp_rast_arg_state(setup->fs.stored) ); + } + lp_scene_bin_command( scene, x, y, + lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); + } + else + { + /* rasterizer/shade partial tile */ + LP_COUNT(nr_partially_covered_64); + in = TRUE; + lp_scene_bin_command( scene, x, y, + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } +} + + +static void triangle_cw( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); +} + + +static void triangle_ccw( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); +} + + +static void triangle_both( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + if (ex * fy - ey * fx < 0.0f) + triangle_ccw( setup, v0, v1, v2 ); + else + triangle_cw( setup, v0, v1, v2 ); +} + + +static void triangle_nop( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +} + + +void +lp_setup_choose_triangle( struct setup_context *setup ) +{ + switch (setup->cullmode) { + case PIPE_WINDING_NONE: + setup->triangle = triangle_both; + break; + case PIPE_WINDING_CCW: + setup->triangle = triangle_cw; + break; + case PIPE_WINDING_CW: + setup->triangle = triangle_ccw; + break; + default: + setup->triangle = triangle_nop; + break; + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c new file mode 100644 index 00000000000..24291da91e4 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -0,0 +1,518 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Interface between 'draw' module's output and the llvmpipe rasterizer/setup + * code. When the 'draw' module has finished filling a vertex buffer, the + * draw_arrays() functions below will be called. Loop over the vertices and + * call the point/line/tri setup functions. + * + * Authors + * Brian Paul + */ + + +#include "lp_setup_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "util/u_memory.h" + + +#define LP_MAX_VBUF_INDEXES 1024 +#define LP_MAX_VBUF_SIZE 4096 + + + +/** cast wrapper */ +static struct setup_context * +setup_context(struct vbuf_render *vbr) +{ + return (struct setup_context *) vbr; +} + + + +static const struct vertex_info * +lp_setup_get_vertex_info(struct vbuf_render *vbr) +{ + struct setup_context *setup = setup_context(vbr); + return setup->vertex_info; +} + + +static boolean +lp_setup_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct setup_context *setup = setup_context(vbr); + unsigned size = vertex_size * nr_vertices; + + if (setup->vertex_buffer_size < size) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = align_malloc(size, 16); + setup->vertex_buffer_size = size; + } + + setup->vertex_size = vertex_size; + setup->nr_vertices = nr_vertices; + + return setup->vertex_buffer != NULL; +} + +static void +lp_setup_release_vertices(struct vbuf_render *vbr) +{ + /* keep the old allocation for next time */ +} + +static void * +lp_setup_map_vertices(struct vbuf_render *vbr) +{ + struct setup_context *setup = setup_context(vbr); + return setup->vertex_buffer; +} + +static void +lp_setup_unmap_vertices(struct vbuf_render *vbr, + ushort min_index, + ushort max_index ) +{ + struct setup_context *setup = setup_context(vbr); + assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size ); + /* do nothing */ +} + + +static boolean +lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + setup_context(vbr)->prim = prim; + return TRUE; +} + +typedef const float (*const_float4_ptr)[4]; + +static INLINE const_float4_ptr get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (const_float4_ptr)((char *)vertex_buffer + index * stride); +} + +/** + * draw elements / indexed primitives + */ +static void +lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +{ + struct setup_context *setup = setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = setup->vertex_buffer; + unsigned i; + + lp_setup_update_state(setup); + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + } + } + else { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i+(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-(i&1)], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride)); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. Note that the first polygon vertex is passed as + * the last triangle vertex here. + * flatshade_first state makes no difference. + */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + default: + assert(0); + } +} + + +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ + struct setup_context *setup = setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = + (void *) get_vert(setup->vertex_buffer, start, stride); + unsigned i; + + lp_setup_update_state(setup); + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-2, stride) ); + } + } + else { + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (setup->flatshade_first) { + for (i = 2; i < nr; i++) { + setup->triangle( setup, + get_vert(vertex_buffer, i+(i&1)-1, stride), + get_vert(vertex_buffer, i-(i&1), stride), + get_vert(vertex_buffer, i-2, stride) ); + } + } + else { + for (i = 2; i < nr; i++) { + setup->triangle( setup, + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (setup->flatshade_first) { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (setup->flatshade_first) { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + setup->triangle( setup, + + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. Note that the first polygon vertex is passed as + * the last triangle vertex here. + * flatshade_first state makes no difference. + */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + default: + assert(0); + } +} + + + +static void +lp_setup_vbuf_destroy(struct vbuf_render *vbr) +{ + lp_setup_destroy(setup_context(vbr)); +} + + +/** + * Create the post-transform vertex handler for the given context. + */ +void +lp_setup_init_vbuf(struct setup_context *setup) +{ + setup->base.max_indices = LP_MAX_VBUF_INDEXES; + setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; + + setup->base.get_vertex_info = lp_setup_get_vertex_info; + setup->base.allocate_vertices = lp_setup_allocate_vertices; + setup->base.map_vertices = lp_setup_map_vertices; + setup->base.unmap_vertices = lp_setup_unmap_vertices; + setup->base.set_primitive = lp_setup_set_primitive; + setup->base.draw = lp_setup_draw; + setup->base.draw_arrays = lp_setup_draw_arrays; + setup->base.release_vertices = lp_setup_release_vertices; + setup->base.destroy = lp_setup_vbuf_destroy; +} diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7020da145f3..8f68f12bed7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,7 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" -#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -54,6 +54,7 @@ #define LP_NEW_VERTEX 0x1000 #define LP_NEW_VS 0x2000 #define LP_NEW_QUERY 0x4000 +#define LP_NEW_BLEND_COLOR 0x8000 struct vertex_info; @@ -65,11 +66,18 @@ struct lp_fragment_shader; struct lp_fragment_shader_variant_key { - enum pipe_format zsbuf_format; struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; - + enum pipe_format zsbuf_format; + unsigned nr_cbufs:8; + unsigned flatshade:1; + unsigned scissor:1; + + struct { + ubyte colormask; + } cbuf_blend[PIPE_MAX_COLOR_BUFS]; + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; @@ -80,9 +88,9 @@ struct lp_fragment_shader_variant struct lp_fragment_shader_variant_key key; - LLVMValueRef function; + LLVMValueRef function[2]; - lp_jit_frag_func jit_function; + lp_jit_frag_func jit_function[2]; struct lp_fragment_shader_variant *next; }; @@ -154,7 +162,7 @@ void llvmpipe_set_clip_state( struct pipe_context *, void llvmpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); void *llvmpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); @@ -212,23 +220,10 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void -llvmpipe_map_transfers(struct llvmpipe_context *lp); - -void -llvmpipe_unmap_transfers(struct llvmpipe_context *lp); - -void llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); void llvmpipe_unmap_texture_surfaces(struct llvmpipe_context *lp); -struct vertex_info * -llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe); - -struct vertex_info * -llvmpipe_get_vbuf_vertex_info(struct llvmpipe_context *llvmpipe); - - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index a94cd05ef20..9b950e82d89 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -73,7 +73,9 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, const struct pipe_blend_color *blend_color ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - unsigned i, j; + + if(!blend_color) + return; if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) return; @@ -82,13 +84,7 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); - if(!llvmpipe->jit_context.blend_color) - llvmpipe->jit_context.blend_color = align_malloc(4 * 16, 16); - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(blend_color->color[i]); - for (j = 0; j < 16; ++j) - llvmpipe->jit_context.blend_color[i*16 + j] = c; - } + llvmpipe->dirty |= LP_NEW_BLEND_COLOR; } @@ -117,9 +113,6 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, llvmpipe->depth_stencil = depth_stencil; - if(llvmpipe->depth_stencil) - llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; - llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6c1ef6bc42d..bdd906e1a73 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -33,166 +33,113 @@ #include "draw/draw_private.h" #include "lp_context.h" #include "lp_screen.h" -#include "lp_tex_cache.h" +#include "lp_setup.h" #include "lp_state.h" -/** - * Mark the current vertex layout as "invalid". - * We'll validate the vertex layout later, when we start to actually - * render a point or line or tri. - */ -static void -invalidate_vertex_layout(struct llvmpipe_context *llvmpipe) -{ - llvmpipe->vertex_info.num_attribs = 0; -} - /** * The vertex info describes how to convert the post-transformed vertices * (simple float[][4]) used by the 'draw' module into vertices for * rasterization. * - * This function validates the vertex layout and returns a pointer to a - * vertex_info object. + * This function validates the vertex layout. */ -struct vertex_info * -llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) +static void +compute_vertex_info(struct llvmpipe_context *llvmpipe) { + const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; + const uint num = draw_num_shader_outputs(llvmpipe->draw); + uint i; - if (vinfo->num_attribs == 0) { - /* compute vertex layout now */ - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_current_shader_outputs(llvmpipe->draw); - uint i; - - /* Tell draw_vbuf to simply emit the whole post-xform vertex - * as-is. No longer any need to try and emit draw vertex_header - * info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell setup to tell the draw module to simply emit the whole + * post-xform vertex as-is. + * + * Not really sure if this is the best approach. + */ + vinfo->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, i); + } + draw_compute_vertex_size(vinfo); - /* - * Loop over fragment shader inputs, searching for the matching output - * from the vertex shader. - */ - vinfo->num_attribs = 0; - for (i = 0; i < lpfs->info.num_inputs; i++) { - int src; - enum interp_mode interp; - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - interp = INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - interp = INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = INTERP_PERSPECTIVE; - break; - default: - assert(0); - interp = INTERP_LINEAR; - } + lp_setup_set_vertex_info(llvmpipe->setup, vinfo); + +/* + llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); +*/ + + /* Now match FS inputs against emitted vertex data. It's also + * entirely possible to just have a fixed layout for FS input, + * determined by the fragment shader itself, and adjust the draw + * outputs to match that. + */ + { + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; + for (i = 0; i < lpfs->info.num_inputs; i++) { + + /* This can be precomputed, except for flatshade: + */ switch (lpfs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_FACE: + inputs[i].interp = LP_INTERP_FACING; + break; case TGSI_SEMANTIC_POSITION: - interp = INTERP_POS; + inputs[i].interp = LP_INTERP_POSITION; break; - case TGSI_SEMANTIC_COLOR: - if (llvmpipe->rasterizer->flatshade) { - interp = INTERP_CONSTANT; - } + /* Colors are linearly interpolated in the fragment shader + * even when flatshading is active. This just tells the + * setup module to use coefficients with ddx==0 and + * ddy==0. + */ + if (llvmpipe->rasterizer->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; break; - } - /* this includes texcoords and varying vars */ - src = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - } + default: + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + default: + assert(0); + break; + } + } - llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, - TGSI_SEMANTIC_PSIZE, 0); - if (llvmpipe->psize_slot > 0) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, - llvmpipe->psize_slot); + /* Search for each input in current vs output: + */ + inputs[i].src_index = + draw_find_shader_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); } - draw_compute_vertex_size(vinfo); + lp_setup_set_fs_inputs(llvmpipe->setup, + inputs, + lpfs->info.num_inputs); } - - return vinfo; } /** - * Called from vbuf module. + * Handle state changes. + * Called just prior to drawing anything (pipe::draw_arrays(), etc). * - * Note that there's actually two different vertex layouts in llvmpipe. - * - * The normal one is computed in llvmpipe_get_vertex_info() above and is - * used by the point/line/tri "setup" code. - * - * The other one (this one) is only used by the vbuf module (which is - * not normally used by default but used in testing). For the vbuf module, - * we basically want to pass-through the draw module's vertex layout as-is. - * When the llvmpipe vbuf code begins drawing, the normal vertex layout - * will come into play again. - */ -struct vertex_info * -llvmpipe_get_vbuf_vertex_info(struct llvmpipe_context *llvmpipe) -{ - (void) llvmpipe_get_vertex_info(llvmpipe); - return &llvmpipe->vertex_info_vbuf; -} - - -/** - * Recompute cliprect from scissor bounds, scissor enable and surface size. - */ -static void -compute_cliprect(struct llvmpipe_context *lp) -{ - /* LP_NEW_FRAMEBUFFER - */ - uint surfWidth = lp->framebuffer.width; - uint surfHeight = lp->framebuffer.height; - - /* LP_NEW_RASTERIZER - */ - if (lp->rasterizer->scissor) { - - /* LP_NEW_SCISSOR - * - * clip to scissor rect: - */ - lp->cliprect.minx = MAX2(lp->scissor.minx, 0); - lp->cliprect.miny = MAX2(lp->scissor.miny, 0); - lp->cliprect.maxx = MIN2(lp->scissor.maxx, surfWidth); - lp->cliprect.maxy = MIN2(lp->scissor.maxy, surfHeight); - } - else { - /* clip to surface bounds */ - lp->cliprect.minx = 0; - lp->cliprect.miny = 0; - lp->cliprect.maxx = surfWidth; - lp->cliprect.maxy = surfHeight; - } -} - - -/* Hopefully this will remain quite simple, otherwise need to pull in + * Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) @@ -206,28 +153,40 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) llvmpipe->dirty |= LP_NEW_TEXTURE; } - if (llvmpipe->dirty & (LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) { - /* TODO */ - } - if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | LP_NEW_VS)) - invalidate_vertex_layout( llvmpipe ); - - if (llvmpipe->dirty & (LP_NEW_SCISSOR | - LP_NEW_RASTERIZER | - LP_NEW_FRAMEBUFFER)) - compute_cliprect(llvmpipe); + compute_vertex_info( llvmpipe ); if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | + LP_NEW_SCISSOR | LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_RASTERIZER | LP_NEW_SAMPLER | LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) + lp_setup_set_blend_color(llvmpipe->setup, + &llvmpipe->blend_color); + + if (llvmpipe->dirty & LP_NEW_SCISSOR) + lp_setup_set_scissor(llvmpipe->setup, &llvmpipe->scissor); + + if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) + lp_setup_set_alpha_ref_value(llvmpipe->setup, + llvmpipe->depth_stencil->alpha.ref_value); + + if (llvmpipe->dirty & LP_NEW_CONSTANTS) + lp_setup_set_fs_constants(llvmpipe->setup, + llvmpipe->constants[PIPE_SHADER_FRAGMENT]); + + if (llvmpipe->dirty & LP_NEW_TEXTURE) + lp_setup_set_sampler_textures(llvmpipe->setup, + llvmpipe->num_textures, + llvmpipe->texture); llvmpipe->dirty = 0; } + diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b73ca2d41ed..15c10d8e2e4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -31,6 +31,8 @@ * Code generate the whole fragment pipeline. * * The fragment pipeline consists of the following stages: + * - triangle edge in/out testing + * - scissor test * - stipple (TBI) * - early depth test * - fragment shader @@ -58,36 +60,39 @@ * @author Jose Fonseca <[email protected]> */ +#include <limits.h> #include "pipe/p_defines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_format.h" #include "util/u_debug_dump.h" -#include "pipe/internal/p_winsys_screen.h" +#include "os/os_time.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_parse.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_conv.h" -#include "lp_bld_intr.h" -#include "lp_bld_logic.h" -#include "lp_bld_depth.h" -#include "lp_bld_interp.h" -#include "lp_bld_tgsi.h" -#include "lp_bld_alpha.h" -#include "lp_bld_blend.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_flow.h" -#include "lp_bld_debug.h" -#include "lp_screen.h" -#include "lp_context.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_depth.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_alpha.h" +#include "gallivm/lp_bld_blend.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" #include "lp_buffer.h" +#include "lp_context.h" +#include "lp_debug.h" +#include "lp_perf.h" +#include "lp_screen.h" +#include "lp_setup.h" #include "lp_state.h" -#include "lp_quad.h" #include "lp_tex_sample.h" -#include "lp_debug.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -187,7 +192,187 @@ generate_depth(LLVMBuilderRef builder, /** + * Generate the code to do inside/outside triangle testing for the + * four pixels in a 2x2 quad. This will set the four elements of the + * quad mask vector to 0 or ~0. + * \param i which quad of the quad group to test, in [0,3] + */ +static void +generate_tri_edge_mask(LLVMBuilderRef builder, + unsigned i, + LLVMValueRef *mask, /* ivec4, out */ + LLVMValueRef c0, /* int32 */ + LLVMValueRef c1, /* int32 */ + LLVMValueRef c2, /* int32 */ + LLVMValueRef step0_ptr, /* ivec4 */ + LLVMValueRef step1_ptr, /* ivec4 */ + LLVMValueRef step2_ptr) /* ivec4 */ +{ +#define OPTIMIZE_IN_OUT_TEST 0 +#if OPTIMIZE_IN_OUT_TEST + struct lp_build_if_state ifctx; + LLVMValueRef not_draw_all; +#endif + struct lp_build_flow_context *flow; + struct lp_type i32_type; + LLVMTypeRef i32vec4_type, mask_type; + LLVMValueRef c0_vec, c1_vec, c2_vec; + LLVMValueRef in_out_mask; + + assert(i < 4); + + /* int32 vector type */ + memset(&i32_type, 0, sizeof i32_type); + i32_type.floating = FALSE; /* values are integers */ + i32_type.sign = TRUE; /* values are signed */ + i32_type.norm = FALSE; /* values are not normalized */ + i32_type.width = 32; /* 32-bit int values */ + i32_type.length = 4; /* 4 elements per vector */ + + i32vec4_type = lp_build_int32_vec4_type(); + + mask_type = LLVMIntType(32 * 4); + + /* + * Use a conditional here to do detailed pixel in/out testing. + * We only have to do this if c0 != INT_MIN. + */ + flow = lp_build_flow_create(builder); + lp_build_flow_scope_begin(flow); + + { +#if OPTIMIZE_IN_OUT_TEST + /* not_draw_all = (c0 != INT_MIN) */ + not_draw_all = LLVMBuildICmp(builder, + LLVMIntNE, + c0, + LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), + ""); + + in_out_mask = lp_build_int_const_scalar(i32_type, ~0); + + + lp_build_flow_scope_declare(flow, &in_out_mask); + + /* if (not_draw_all) {... */ + lp_build_if(&ifctx, flow, builder, not_draw_all); +#endif + { + LLVMValueRef step0_vec, step1_vec, step2_vec; + LLVMValueRef m0_vec, m1_vec, m2_vec; + LLVMValueRef index, m; + + /* c0_vec = {c0, c0, c0, c0} + * Note that we emit this code four times but LLVM optimizes away + * three instances of it. + */ + c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); + c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); + c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + lp_build_name(c0_vec, "edgeconst0vec"); + lp_build_name(c1_vec, "edgeconst1vec"); + lp_build_name(c2_vec, "edgeconst2vec"); + + /* load step0vec, step1, step2 vec from memory */ + index = LLVMConstInt(LLVMInt32Type(), i, 0); + step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); + step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); + step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + lp_build_name(step0_vec, "step0vec"); + lp_build_name(step1_vec, "step1vec"); + lp_build_name(step2_vec, "step2vec"); + + /* m0_vec = step0_ptr[i] > c0_vec */ + m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); + m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); + m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); + + /* in_out_mask = m0_vec & m1_vec & m2_vec */ + m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); + in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); + lp_build_name(in_out_mask, "inoutmaskvec"); + } +#if OPTIMIZE_IN_OUT_TEST + lp_build_endif(&ifctx); +#endif + + } + lp_build_flow_scope_end(flow); + lp_build_flow_destroy(flow); + + /* This is the initial alive/dead pixel mask for a quad of four pixels. + * It's an int[4] vector with each word set to 0 or ~0. + * Words will get cleared when pixels faile the Z test, etc. + */ + *mask = in_out_mask; +} + + +static LLVMValueRef +generate_scissor_test(LLVMBuilderRef builder, + LLVMValueRef context_ptr, + const struct lp_build_interp_soa_context *interp, + struct lp_type type) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; + LLVMValueRef xmin, ymin, xmax, ymax; + LLVMValueRef m0, m1, m2, m3, m; + + /* xpos, ypos contain the window coords for the four pixels in the quad */ + assert(xpos); + assert(ypos); + + /* get the current scissor bounds, convert to vectors */ + xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); + xmin = lp_build_broadcast(builder, vec_type, xmin); + + ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); + ymin = lp_build_broadcast(builder, vec_type, ymin); + + xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); + xmax = lp_build_broadcast(builder, vec_type, xmax); + + ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); + ymax = lp_build_broadcast(builder, vec_type, ymax); + + /* compare the fragment's position coordinates against the scissor bounds */ + m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); + m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); + m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); + m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); + + /* AND all the masks together */ + m = LLVMBuildAnd(builder, m0, m1, ""); + m = LLVMBuildAnd(builder, m, m2, ""); + m = LLVMBuildAnd(builder, m, m3, ""); + + lp_build_name(m, "scissormask"); + + return m; +} + + +static LLVMValueRef +build_int32_vec_const(int value) +{ + struct lp_type i32_type; + + memset(&i32_type, 0, sizeof i32_type); + i32_type.floating = FALSE; /* values are integers */ + i32_type.sign = TRUE; /* values are signed */ + i32_type.norm = FALSE; /* values are not normalized */ + i32_type.width = 32; /* 32-bit int values */ + i32_type.length = 4; /* 4 elements per vector */ + return lp_build_int_const_scalar(i32_type, value); +} + + + +/** * Generate the fragment shader, depth/stencil test, and alpha tests. + * \param i which quad in the tile, in range [0,3] + * \param do_tri_test if 1, do triangle edge in/out testing */ static void generate_fs(struct llvmpipe_context *lp, @@ -200,8 +385,15 @@ generate_fs(struct llvmpipe_context *lp, const struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, - LLVMValueRef *color, - LLVMValueRef depth_ptr) + LLVMValueRef (*color)[4], + LLVMValueRef depth_ptr, + unsigned do_tri_test, + LLVMValueRef c0, + LLVMValueRef c1, + LLVMValueRef c2, + LLVMValueRef step0_ptr, + LLVMValueRef step1_ptr, + LLVMValueRef step2_ptr) { const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef elem_type; @@ -215,6 +407,9 @@ generate_fs(struct llvmpipe_context *lp, boolean early_depth_test; unsigned attrib; unsigned chan; + unsigned cbuf; + + assert(i < 4); elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); @@ -229,14 +424,32 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_begin(flow); /* Declare the color and z variables */ - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[chan]); + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[cbuf][chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + } } lp_build_flow_scope_declare(flow, &z); + /* do triangle edge testing */ + if (do_tri_test) { + generate_tri_edge_mask(builder, i, pmask, + c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + } + else { + *pmask = build_int32_vec_const(~0); + } + + /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); + if (key->scissor) { + LLVMValueRef smask = + generate_scissor_test(builder, context_ptr, interp, type); + lp_build_mask_update(&mask, smask); + } + early_depth_test = key->depth.enabled && !key->alpha.enabled && @@ -255,19 +468,21 @@ generate_fs(struct llvmpipe_context *lp, for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(outputs[attrib][chan]) { - lp_build_name(outputs[attrib][chan], "output%u.%u.%c", i, attrib, "xyzw"[chan]); + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); switch (shader->info.output_semantic_name[attrib]) { case TGSI_SEMANTIC_COLOR: { unsigned cbuf = shader->info.output_semantic_index[attrib]; - lp_build_name(outputs[attrib][chan], "color%u.%u.%c", i, attrib, "rgba"[chan]); + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); /* Alpha test */ /* XXX: should the alpha reference value be passed separately? */ + /* XXX: should only test the final assignment to alpha */ if(cbuf == 0 && chan == 3) { - LLVMValueRef alpha = outputs[attrib][chan]; + LLVMValueRef alpha = out; LLVMValueRef alpha_ref_value; alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); @@ -275,15 +490,13 @@ generate_fs(struct llvmpipe_context *lp, &mask, alpha, alpha_ref_value); } - if(cbuf == 0) - color[chan] = outputs[attrib][chan]; - + color[cbuf][chan] = out; break; } case TGSI_SEMANTIC_POSITION: if(chan == 2) - z = outputs[attrib][chan]; + z = out; break; } } @@ -332,6 +545,8 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); + + /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); @@ -354,7 +569,7 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { - if(blend->colormask & (1 << chan)) { + if(blend->rt[0].colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); @@ -369,14 +584,18 @@ generate_blend(const struct pipe_blend_state *blend, /** * Generate the runtime callable function for the whole fragment pipeline. + * Note that the function which we generate operates on a block of 16 + * pixels at at time. The block contains 2x2 quads. Each quad contains + * 2x2 pixels. */ -static struct lp_fragment_shader_variant * +static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, - const struct lp_fragment_shader_variant_key *key) + struct lp_fragment_shader_variant *variant, + unsigned do_tri_test) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); - struct lp_fragment_shader_variant *variant; + const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; @@ -384,17 +603,18 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; - LLVMTypeRef arg_types[9]; + LLVMTypeRef arg_types[14]; LLVMTypeRef func_type; + LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; - LLVMValueRef mask_ptr; - LLVMValueRef color_ptr; + LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; + LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; @@ -402,71 +622,15 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; + LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; + LLVMValueRef function; unsigned num_fs; unsigned i; unsigned chan; + unsigned cbuf; - if (LP_DEBUG & DEBUG_JIT) { - tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); - debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.blend_enable) { - debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); - } - debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if(key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - pf_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - debug_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); - } - } - } - - variant = CALLOC_STRUCT(lp_fragment_shader_variant); - if(!variant) - return NULL; - - variant->shader = shader; - memcpy(&variant->key, key, sizeof *key); /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@ -476,8 +640,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ - fs_type.length = 4; /* 4 element per vector */ - num_fs = 4; + fs_type.length = 4; /* 4 elements per vector */ + num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ @@ -504,27 +668,47 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ - arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */ - arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ + arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMInt32Type(); /* c0 */ + arg_types[9] = LLVMInt32Type(); /* c1 */ + arg_types[10] = LLVMInt32Type(); /* c2 */ + /* Note: the step arrays are built as int32[16] but we interpret + * them here as int32_vec4[4]. + */ + arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ + arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ + arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - variant->function = LLVMAddFunction(screen->module, "shader", func_type); - LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + function = LLVMAddFunction(screen->module, "shader", func_type); + LLVMSetFunctionCallConv(function, LLVMCCallConv); + + variant->function[do_tri_test] = function; + + + /* XXX: need to propagate noalias down into color param now we are + * passing a pointer-to-pointer? + */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); - - context_ptr = LLVMGetParam(variant->function, 0); - x = LLVMGetParam(variant->function, 1); - y = LLVMGetParam(variant->function, 2); - a0_ptr = LLVMGetParam(variant->function, 3); - dadx_ptr = LLVMGetParam(variant->function, 4); - dady_ptr = LLVMGetParam(variant->function, 5); - mask_ptr = LLVMGetParam(variant->function, 6); - color_ptr = LLVMGetParam(variant->function, 7); - depth_ptr = LLVMGetParam(variant->function, 8); + LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(function, 0); + x = LLVMGetParam(function, 1); + y = LLVMGetParam(function, 2); + a0_ptr = LLVMGetParam(function, 3); + dadx_ptr = LLVMGetParam(function, 4); + dady_ptr = LLVMGetParam(function, 5); + color_ptr_ptr = LLVMGetParam(function, 6); + depth_ptr = LLVMGetParam(function, 7); + c0 = LLVMGetParam(function, 8); + c1 = LLVMGetParam(function, 9); + c2 = LLVMGetParam(function, 10); + step0_ptr = LLVMGetParam(function, 11); + step1_ptr = LLVMGetParam(function, 12); + step2_ptr = LLVMGetParam(function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -532,36 +716,45 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); - lp_build_name(mask_ptr, "mask"); - lp_build_name(color_ptr, "color"); + lp_build_name(color_ptr_ptr, "color_ptr"); lp_build_name(depth_ptr, "depth"); + lp_build_name(c0, "c0"); + lp_build_name(c1, "c1"); + lp_build_name(c2, "c2"); + lp_build_name(step0_ptr, "step0"); + lp_build_name(step1_ptr, "step1"); + lp_build_name(step2_ptr, "step2"); /* * Function body */ - block = LLVMAppendBasicBlock(variant->function, "entry"); + block = LLVMAppendBasicBlock(function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); generate_pos0(builder, x, y, &x0, &y0); - lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, + lp_build_interp_soa_init(&interp, + shader->base.tokens, + key->flatshade, + builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, - x0, y0, 2, 0); + x0, y0); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); + /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef out_color[NUM_CHANNELS]; + LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; + int cbuf; if(i != 0) - lp_build_interp_soa_update(&interp); + lp_build_interp_soa_update(&interp, i); - fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, @@ -571,71 +764,162 @@ generate_fragment(struct llvmpipe_context *lp, i, &interp, sampler, - &fs_mask[i], + &fs_mask[i], /* output */ out_color, - depth_ptr_i); - - for(chan = 0; chan < NUM_CHANNELS; ++chan) - fs_out_color[chan][i] = out_color[chan]; + depth_ptr_i, + do_tri_test, + c0, c1, c2, + step0_ptr, step1_ptr, step2_ptr); + + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) + for(chan = 0; chan < NUM_CHANNELS; ++chan) + fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; } sampler->destroy(sampler); - /* - * Convert the fs's output color and mask to fit to the blending type. + /* Loop over color outputs / color buffers to do blending. */ + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + LLVMValueRef color_ptr; + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - lp_build_conv(builder, fs_type, blend_type, - fs_out_color[chan], num_fs, - &blend_in_color[chan], 1); - lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]); + /* + * Convert the fs's output color and mask to fit to the blending type. + */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + lp_build_conv(builder, fs_type, blend_type, + fs_out_color[cbuf][chan], num_fs, + &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); + } + lp_build_conv_mask(builder, fs_type, blend_type, + fs_mask, num_fs, + &blend_mask, 1); + + color_ptr = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), + ""); + lp_build_name(color_ptr, "color_ptr%d", cbuf); + + /* + * Blending. + */ + generate_blend(&key->blend, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr); } - lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); - - /* - * Blending. - */ - - generate_blend(&key->blend, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); - LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); - /* - * Translate the LLVM IR into machine code. - */ + /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - LLVMDumpValue(variant->function); - assert(0); + if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { + if (1) + LLVMDumpValue(function); + abort(); } #endif - LLVMRunFunctionPassManager(screen->pass, variant->function); + /* Apply optimizations to LLVM IR */ + if (1) + LLVMRunFunctionPassManager(screen->pass, function); if (LP_DEBUG & DEBUG_JIT) { - LLVMDumpValue(variant->function); + /* Print the LLVM IR to stderr */ + LLVMDumpValue(function); debug_printf("\n"); } - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); + /* + * Translate the LLVM IR into machine code. + */ + variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) - lp_disassemble(variant->jit_function); + lp_disassemble(variant->jit_function[do_tri_test]); +} + + +static struct lp_fragment_shader_variant * +generate_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, + const struct lp_fragment_shader_variant_key *key) +{ + struct lp_fragment_shader_variant *variant; + + if (LP_DEBUG & DEBUG_JIT) { + unsigned i; + tgsi_dump(shader->base.tokens, 0); + if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); + debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + if(key->alpha.enabled) { + debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); + debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); + } + if(key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); + } + else if(key->blend.rt[0].blend_enable) { + debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rt[0].rgb_func, TRUE)); + debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE)); + debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE)); + debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.rt[0].alpha_func, TRUE)); + debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE)); + debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + } + } + } + + variant = CALLOC_STRUCT(lp_fragment_shader_variant); + if(!variant) + return NULL; + + variant->shader = shader; + memcpy(&variant->key, key, sizeof *key); + + generate_fragment(lp, shader, variant, 0); + generate_fragment(lp, shader, variant, 1); + + /* insert new variant into linked list */ variant->next = shader->variants; shader->variants = variant; @@ -693,11 +977,15 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) variant = shader->variants; while(variant) { struct lp_fragment_shader_variant *next = variant->next; - - if(variant->function) { - if(variant->jit_function) - LLVMFreeMachineCodeForFunction(screen->engine, variant->function); - LLVMDeleteFunction(variant->function); + unsigned i; + + for (i = 0; i < Elements(variant->function); i++) { + if (variant->function[i]) { + if (variant->jit_function[i]) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function[i]); + LLVMDeleteFunction(variant->function[i]); + } } FREE(variant); @@ -714,27 +1002,25 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *constants) + struct pipe_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct pipe_buffer *buffer = constants ? constants->buffer : NULL; - unsigned size = buffer ? buffer->size : 0; - const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; + unsigned size = constants ? constants->size : 0; + const void *data = constants ? llvmpipe_buffer(constants)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(llvmpipe->constants[shader] == constants) + return; + draw_flush(llvmpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); - - if(shader == PIPE_SHADER_FRAGMENT) { - llvmpipe->jit_context.constants = data; - } + pipe_buffer_reference(&llvmpipe->constants[shader], constants); if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, 0, data, size); } @@ -769,21 +1055,30 @@ make_variant_key(struct llvmpipe_context *lp, key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - if(lp->framebuffer.cbufs[0]) { - const struct util_format_description *format_desc; - unsigned chan; + key->flatshade = lp->rasterizer->flatshade; + key->scissor = lp->rasterizer->scissor; + if (lp->framebuffer.nr_cbufs) { memcpy(&key->blend, lp->blend, sizeof key->blend); + } - format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + key->nr_cbufs = lp->framebuffer.nr_cbufs; + for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { + const struct util_format_description *format_desc; + unsigned chan; + + format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); - /* mask out color channels not present in the color buffer */ + /* mask out color channels not present in the color buffer. + * Should be simple to incorporate per-cbuf writemasks: + */ for(chan = 0; chan < 4; ++chan) { enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - if(swizzle > 4) - key->blend.colormask &= ~(1 << chan); + + if(swizzle <= UTIL_FORMAT_SWIZZLE_W) + key->blend.rt[0].colormask |= (1 << chan); } } @@ -793,12 +1088,17 @@ make_variant_key(struct llvmpipe_context *lp, } +/** + * Update fragment state. This is called just prior to drawing + * something when some fragment-related state has changed. + */ void llvmpipe_update_fs(struct llvmpipe_context *lp) { struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; + boolean opaque; make_variant_key(lp, shader, &key); @@ -810,8 +1110,34 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) variant = variant->next; } - if(!variant) - variant = generate_fragment(lp, shader, &key); + if (!variant) { + int64_t t0, t1; + int64_t dt; + t0 = os_time_get(); + + variant = generate_variant(lp, shader, &key); + + t1 = os_time_get(); + dt = t1 - t0; + LP_COUNT_ADD(llvm_compile_time, dt); + LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + } shader->current = variant; + + /* TODO: put this in the variant */ + /* TODO: most of these can be relaxed, in particular the colormask */ + opaque = !key.blend.logicop_enable && + !key.blend.rt[0].blend_enable && + key.blend.rt[0].colormask == 0xf && + !key.alpha.enabled && + !key.depth.enabled && + !key.scissor && + !shader->info.uses_kill + ? TRUE : FALSE; + + lp_setup_set_fs_functions(lp->setup, + shader->current->jit_function[0], + shader->current->jit_function[1], + opaque); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index aa3b5a3f91e..feb012816c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -29,6 +29,7 @@ #include "util/u_memory.h" #include "lp_context.h" #include "lp_state.h" +#include "lp_setup.h" #include "draw/draw_context.h" @@ -53,6 +54,17 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, llvmpipe->rasterizer = rasterizer; + /* Note: we can immediately set the triangle state here and + * not worry about binning because we handle culling during + * triangle setup, not when rasterizing the bins. + */ + if (llvmpipe->rasterizer) { + lp_setup_set_triangle_state( llvmpipe->setup, + llvmpipe->rasterizer->cull_mode, + llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, + llvmpipe->rasterizer->scissor); + } + llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index d382f9ca87e..b30a0757768 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -29,6 +29,7 @@ * Brian Paul */ +#include "util/u_inlines.h" #include "util/u_memory.h" #include "draw/draw_context.h" @@ -36,8 +37,6 @@ #include "lp_context.h" #include "lp_context.h" #include "lp_state.h" -#include "lp_texture.h" -#include "lp_tex_cache.h" #include "draw/draw_context.h" @@ -125,17 +124,6 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&llvmpipe->texture[i], tex); - lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); - - if(tex) { - struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); - struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; - jit_tex->width = tex->width0; - jit_tex->height = tex->height0; - jit_tex->stride = lp_tex->stride[0]; - if(!lp_tex->dt) - jit_tex->data = lp_tex->data; - } } llvmpipe->num_textures = num; @@ -166,7 +154,6 @@ llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; pipe_texture_reference(&llvmpipe->vertex_textures[i], tex); - lp_tex_tile_cache_set_texture(llvmpipe->vertex_tex_cache[i], tex); } llvmpipe->num_vertex_textures = num_textures; diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index e37ff04f3df..048ac5b968b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -28,10 +28,12 @@ /* Authors: Keith Whitwell <[email protected]> */ +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" #include "lp_context.h" #include "lp_state.h" -#include "lp_surface.h" -#include "lp_tile_cache.h" +#include "lp_setup.h" #include "draw/draw_context.h" @@ -39,54 +41,19 @@ /** - * XXX this might get moved someday * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. - * Here, we flush the old surfaces and update the tile cache to point to the new - * surfaces. */ void llvmpipe_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct llvmpipe_context *lp = llvmpipe_context(pipe); - uint i; - draw_flush(lp->draw); + boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - /* check if changing cbuf */ - if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { - /* flush old */ - lp_tile_cache_map_transfers(lp->cbuf_cache[i]); - lp_flush_tile_cache(lp->cbuf_cache[i]); + if (changed) { - /* assign new */ - pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); - - /* update cache */ - lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]); - } - } - - lp->framebuffer.nr_cbufs = fb->nr_cbufs; - - /* zbuf changing? */ - if (lp->framebuffer.zsbuf != fb->zsbuf) { - - if(lp->zsbuf_transfer) { - struct pipe_screen *screen = pipe->screen; - - if(lp->zsbuf_map) { - screen->transfer_unmap(screen, lp->zsbuf_transfer); - lp->zsbuf_map = NULL; - } - - screen->tex_transfer_destroy(lp->zsbuf_transfer); - lp->zsbuf_transfer = NULL; - } - - /* assign new */ - pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); + util_copy_framebuffer_state(&lp->framebuffer, fb); /* Tell draw module how deep the Z/depth buffer is */ if (lp->framebuffer.zsbuf) { @@ -103,10 +70,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } draw_set_mrd(lp->draw, mrd); } - } - lp->framebuffer.width = fb->width; - lp->framebuffer.height = fb->height; + lp_setup_bind_framebuffer( lp->setup, &lp->framebuffer ); - lp->dirty |= LP_NEW_FRAMEBUFFER; + lp->dirty |= LP_NEW_FRAMEBUFFER; + } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 1a17631a4c6..57ac25ea0cb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -31,7 +31,6 @@ #include "lp_context.h" #include "lp_state.h" -#include "lp_surface.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 39d80726e65..ca0f737b295 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -53,7 +53,7 @@ #include "util/u_math.h" #include "util/u_debug_dump.h" -#include "lp_bld_type.h" +#include "gallivm/lp_bld_type.h" #define LP_TEST_NUM_SAMPLES 32 diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981a..e49b7055982 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -37,10 +37,9 @@ */ -#include "lp_bld_type.h" -#include "lp_bld_arit.h" -#include "lp_bld_blend.h" -#include "lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_blend.h" +#include "gallivm/lp_bld_debug.h" #include "lp_test.h" @@ -104,18 +103,18 @@ write_tsv_row(FILE *fp, fprintf(fp, "%s\t%s\t%s\t", - blend->rgb_func != blend->alpha_func ? "true" : "false", - blend->rgb_src_factor != blend->alpha_src_factor ? "true" : "false", - blend->rgb_dst_factor != blend->alpha_dst_factor ? "true" : "false"); + blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false", + blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false", + blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false"); fprintf(fp, "%s\t%s\t%s\t%s\t%s\t%s\n", - debug_dump_blend_func(blend->rgb_func, TRUE), - debug_dump_blend_factor(blend->rgb_src_factor, TRUE), - debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), - debug_dump_blend_func(blend->alpha_func, TRUE), - debug_dump_blend_factor(blend->alpha_src_factor, TRUE), - debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); + debug_dump_blend_func(blend->rt[0].rgb_func, TRUE), + debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), + debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), + debug_dump_blend_func(blend->rt[0].alpha_func, TRUE), + debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), + debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); fflush(fp); } @@ -137,12 +136,12 @@ dump_blend_type(FILE *fp, fprintf(fp, " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", - "rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE), - "rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE), - "rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), - "alpha_func", debug_dump_blend_func(blend->alpha_func, TRUE), - "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE), - "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); + "rgb_func", debug_dump_blend_func(blend->rt[0].rgb_func, TRUE), + "rgb_src_factor", debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), + "rgb_dst_factor", debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), + "alpha_func", debug_dump_blend_func(blend->rt[0].alpha_func, TRUE), + "alpha_src_factor", debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), + "alpha_dst_factor", debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); fprintf(fp, " ...\n"); fflush(fp); @@ -401,13 +400,15 @@ compute_blend_ref(const struct pipe_blend_state *blend, double src_term[4]; double dst_term[4]; - compute_blend_ref_term(blend->rgb_src_factor, blend->alpha_src_factor, src, src, dst, con, src_term); - compute_blend_ref_term(blend->rgb_dst_factor, blend->alpha_dst_factor, dst, src, dst, con, dst_term); + compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor, + src, src, dst, con, src_term); + compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor, + dst, src, dst, con, dst_term); /* * Combine RGB terms */ - switch (blend->rgb_func) { + switch (blend->rt[0].rgb_func) { case PIPE_BLEND_ADD: ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ @@ -440,7 +441,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, /* * Combine A terms */ - switch (blend->alpha_func) { + switch (blend->rt[0].alpha_func) { case PIPE_BLEND_ADD: ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ break; @@ -462,7 +463,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -531,11 +532,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +597,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; @@ -806,14 +807,14 @@ test_all(unsigned verbose, FILE *fp) continue; memset(&blend, 0, sizeof blend); - blend.blend_enable = 1; - blend.rgb_func = *rgb_func; - blend.rgb_src_factor = *rgb_src_factor; - blend.rgb_dst_factor = *rgb_dst_factor; - blend.alpha_func = *alpha_func; - blend.alpha_src_factor = *alpha_src_factor; - blend.alpha_dst_factor = *alpha_dst_factor; - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = *rgb_func; + blend.rt[0].rgb_src_factor = *rgb_src_factor; + blend.rt[0].rgb_dst_factor = *rgb_dst_factor; + blend.rt[0].alpha_func = *alpha_func; + blend.rt[0].alpha_src_factor = *alpha_src_factor; + blend.rt[0].alpha_dst_factor = *alpha_dst_factor; + blend.rt[0].colormask = PIPE_MASK_RGBA; if(!test_one(verbose, fp, &blend, mode, *type)) success = FALSE; @@ -865,14 +866,14 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) type = &blend_types[rand() % num_types]; memset(&blend, 0, sizeof blend); - blend.blend_enable = 1; - blend.rgb_func = *rgb_func; - blend.rgb_src_factor = *rgb_src_factor; - blend.rgb_dst_factor = *rgb_dst_factor; - blend.alpha_func = *alpha_func; - blend.alpha_src_factor = *alpha_src_factor; - blend.alpha_dst_factor = *alpha_dst_factor; - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = *rgb_func; + blend.rt[0].rgb_src_factor = *rgb_src_factor; + blend.rt[0].rgb_dst_factor = *rgb_dst_factor; + blend.rt[0].alpha_func = *alpha_func; + blend.rt[0].alpha_src_factor = *alpha_src_factor; + blend.rt[0].alpha_dst_factor = *alpha_dst_factor; + blend.rt[0].colormask = PIPE_MASK_RGBA; if(!test_one(verbose, fp, &blend, mode, *type)) success = FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index faddfb96779..958cc40538e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -34,10 +34,10 @@ */ -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_conv.h" -#include "lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_debug.h" #include "lp_test.h" @@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 23ea9ebbe7d..48828bd0a0f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -38,7 +38,7 @@ #include "util/u_cpu_detect.h" #include "util/u_format.h" -#include "lp_bld_format.h" +#include "gallivm/lp_bld_format.h" #include "lp_test.h" @@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 314544aa9a6..14ff00469b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -36,8 +36,8 @@ #include "util/u_cpu_detect.h" -#include "lp_bld_const.h" -#include "lp_bld_misc.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_misc.h" #include "lp_test.h" diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c deleted file mode 100644 index a6d9a2c1acf..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ /dev/null @@ -1,304 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture tile caching. - * - * Author: - * Brian Paul - */ - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_tile.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_cache.h" - - - -/** - * Return the position in the cache for the tile that contains win pos (x,y). - * We currently use a direct mapped cache so this is like a hack key. - * At some point we should investige something more sophisticated, like - * a LRU replacement policy. - */ -#define CACHE_POS(x, y) \ - (((x) + (y) * 5) % NUM_ENTRIES) - - - -/** - * Is the tile at (x,y) in cleared state? - */ -static INLINE uint -is_clear_flag_set(const uint *bitvec, union tex_tile_address addr) -{ - int pos, bit; - pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x; - assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32); - bit = bitvec[pos / 32] & (1 << (pos & 31)); - return bit; -} - - -/** - * Mark the tile at (x,y) as not cleared. - */ -static INLINE void -clear_clear_flag(uint *bitvec, union tex_tile_address addr) -{ - int pos; - pos = addr.bits.y * (MAX_TEX_WIDTH / TEX_TILE_SIZE) + addr.bits.x; - assert(pos / 32 < (MAX_TEX_WIDTH / TEX_TILE_SIZE) * (MAX_TEX_HEIGHT / TEX_TILE_SIZE) / 32); - bitvec[pos / 32] &= ~(1 << (pos & 31)); -} - - -struct llvmpipe_tex_tile_cache * -lp_create_tex_tile_cache( struct pipe_screen *screen ) -{ - struct llvmpipe_tex_tile_cache *tc; - uint pos; - - tc = CALLOC_STRUCT( llvmpipe_tex_tile_cache ); - if (tc) { - tc->screen = screen; - for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].addr.bits.invalid = 1; - } - tc->last_tile = &tc->entries[0]; /* any tile */ - } - return tc; -} - - -void -lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc) -{ - struct pipe_screen *screen; - uint pos; - - for (pos = 0; pos < NUM_ENTRIES; pos++) { - /*assert(tc->entries[pos].x < 0);*/ - } - if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); - } - if (tc->tex_trans) { - screen = tc->tex_trans->texture->screen; - screen->tex_transfer_destroy(tc->tex_trans); - } - - FREE( tc ); -} - - -void -lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); - - if (tc->tex_trans && !tc->tex_trans_map) - tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); -} - - -void -lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); - tc->transfer_map = NULL; - } - - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } -} - -void -lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) -{ - if (tc->texture) { - struct llvmpipe_texture *lpt = llvmpipe_texture(tc->texture); - if (lpt->timestamp != tc->timestamp) { - /* texture was modified, invalidate all cached tiles */ - uint i; - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->timestamp = lpt->timestamp; - } - } -} - -/** - * Specify the texture to cache. - */ -void -lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc, - struct pipe_texture *texture) -{ - uint i; - - assert(!tc->transfer); - - if (tc->texture != texture) { - pipe_texture_reference(&tc->texture, texture); - - if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; - - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - /* mark as entries as invalid/empty */ - /* XXX we should try to avoid this when the teximage hasn't changed */ - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->tex_face = -1; /* any invalid value here */ - } -} - - -/** - * Given the texture face, level, zslice, x and y values, compute - * the cache entry position/index where we'd hope to find the - * cached texture tile. - * This is basically a direct-map cache. - * XXX There's probably lots of ways in which we can improve this. - */ -static INLINE uint -tex_cache_pos( union tex_tile_address addr ) -{ - uint entry = (addr.bits.x + - addr.bits.y * 9 + - addr.bits.z * 3 + - addr.bits.face + - addr.bits.level * 7); - - return entry % NUM_ENTRIES; -} - -/** - * Similar to lp_get_cached_tile() but for textures. - * Tiles are read-only and indexed with more params. - */ -const struct llvmpipe_cached_tex_tile * -lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ) -{ - struct pipe_screen *screen = tc->screen; - struct llvmpipe_cached_tex_tile *tile; - - tile = tc->entries + tex_cache_pos( addr ); - - if (addr.value != tile->addr.value) { - - /* cache miss. Most misses are because we've invaldiated the - * texture cache previously -- most commonly on binding a new - * texture. Currently we effectively flush the cache on texture - * bind. - */ -#if 0 - _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" - " tile %u: x=%d y=%d z=%d face=%d level=%d\n", - pos, x/TEX_TILE_SIZE, y/TEX_TILE_SIZE, z, face, level, - pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level); -#endif - - /* check if we need to get a new transfer */ - if (!tc->tex_trans || - tc->tex_face != addr.bits.face || - tc->tex_level != addr.bits.level || - tc->tex_z != addr.bits.z) { - /* get new transfer (view into texture) */ - - if (tc->tex_trans) { - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - tc->tex_trans = - screen->get_tex_transfer(screen, tc->texture, - addr.bits.face, - addr.bits.level, - addr.bits.z, - PIPE_TRANSFER_READ, 0, 0, - u_minify(tc->texture->width0, addr.bits.level), - u_minify(tc->texture->height0, addr.bits.level)); - - tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); - - tc->tex_face = addr.bits.face; - tc->tex_level = addr.bits.level; - tc->tex_z = addr.bits.z; - } - - { - unsigned x = addr.bits.x * TEX_TILE_SIZE; - unsigned y = addr.bits.y * TEX_TILE_SIZE; - unsigned w = TEX_TILE_SIZE; - unsigned h = TEX_TILE_SIZE; - - if (pipe_clip_tile(x, y, &w, &h, tc->tex_trans)) { - assert(0); - } - - util_format_read_4ub(tc->tex_trans->texture->format, - (uint8_t *)tile->color, sizeof tile->color[0], - tc->tex_trans_map, tc->tex_trans->stride, - x, y, w, h); - } - - tile->addr = addr; - } - - tc->last_tile = tile; - return tile; -} diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h deleted file mode 100644 index 05fded78e16..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_TEX_CACHE_H -#define LP_TEX_CACHE_H - - -#include "pipe/p_compiler.h" - - -struct llvmpipe_context; -struct llvmpipe_tex_tile_cache; - - -/** - * Cache tile size (width and height). This needs to be a power of two. - */ -#define TEX_TILE_SIZE 64 - - -/* If we need to support > 4096, just expand this to be a 64 bit - * union, or consider tiling in Z as well. - */ -union tex_tile_address { - struct { - unsigned x:6; /* 4096 / TEX_TILE_SIZE */ - unsigned y:6; /* 4096 / TEX_TILE_SIZE */ - unsigned z:12; /* 4096 -- z not tiled */ - unsigned face:3; - unsigned level:4; - unsigned invalid:1; - } bits; - unsigned value; -}; - - -struct llvmpipe_cached_tex_tile -{ - union tex_tile_address addr; - uint8_t color[TEX_TILE_SIZE][TEX_TILE_SIZE][4]; -}; - -#define NUM_ENTRIES 50 - - -/** XXX move these */ -#define MAX_TEX_WIDTH 2048 -#define MAX_TEX_HEIGHT 2048 - - -struct llvmpipe_tex_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct pipe_texture *texture; /**< if caching a texture */ - unsigned timestamp; - - struct llvmpipe_cached_tex_tile entries[NUM_ENTRIES]; - - struct pipe_transfer *tex_trans; - void *tex_trans_map; - int tex_face, tex_level, tex_z; - - struct llvmpipe_cached_tex_tile *last_tile; /**< most recently retrieved tile */ -}; - - -extern struct llvmpipe_tex_tile_cache * -lp_create_tex_tile_cache( struct pipe_screen *screen ); - -extern void -lp_destroy_tex_tile_cache(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_map_transfers(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_unmap_transfers(struct llvmpipe_tex_tile_cache *tc); - -extern void -lp_tex_tile_cache_set_texture(struct llvmpipe_tex_tile_cache *tc, - struct pipe_texture *texture); - -void -lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc); - -extern const struct llvmpipe_cached_tex_tile * -lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ); - -static INLINE union tex_tile_address -tex_tile_address( unsigned x, - unsigned y, - unsigned z, - unsigned face, - unsigned level ) -{ - union tex_tile_address addr; - - addr.value = 0; - addr.bits.x = x / TEX_TILE_SIZE; - addr.bits.y = y / TEX_TILE_SIZE; - addr.bits.z = z; - addr.bits.face = face; - addr.bits.level = level; - - return addr; -} - -/* Quickly retrieve tile if it matches last lookup. - */ -static INLINE const struct llvmpipe_cached_tex_tile * -lp_get_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, - union tex_tile_address addr ) -{ - if (tc->last_tile->addr.value == addr.value) - return tc->last_tile; - - return lp_find_cached_tex_tile( tc, addr ); -} - - -#endif /* LP_TEX_CACHE_H */ - diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index d2a6ae21f57..2533275dc18 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -42,12 +42,11 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "lp_bld_debug.h" -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_sample.h" -#include "lp_bld_tgsi.h" -#include "lp_state.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_sample.h" +#include "gallivm/lp_bld_tgsi.h" +#include "lp_jit.h" #include "lp_tex_sample.h" diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 2c135029ea2..022bf92cb46 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -32,45 +32,42 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_context.h" -#include "lp_state.h" -#include "lp_texture.h" -#include "lp_tex_cache.h" #include "lp_screen.h" +#include "lp_texture.h" +#include "lp_tile_size.h" #include "lp_winsys.h" -/* Simple, maximally packed layout. - */ - -/* Conventional allocation path for non-display textures: +/** + * Conventional allocation path for non-display textures: + * Simple, maximally packed layout. */ static boolean llvmpipe_texture_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture * lpt) + struct llvmpipe_texture *lpt) { struct pipe_texture *pt = &lpt->base; unsigned level; unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned buffer_size = 0; for (level = 0; level <= pt->last_level; level++) { unsigned nblocksx, nblocksy; /* Allocate storage for whole quads. This is particularly important - * for depth surfaces, which are currently stored in a swizzled format. */ - nblocksx = util_format_get_nblocksx(pt->format, align(width, 2)); - nblocksy = util_format_get_nblocksy(pt->format, align(height, 2)); + * for depth surfaces, which are currently stored in a swizzled format. + */ + nblocksx = util_format_get_nblocksx(pt->format, align(width, TILE_SIZE)); + nblocksy = util_format_get_nblocksy(pt->format, align(height, TILE_SIZE)); lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16); @@ -80,7 +77,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = u_minify(width, 1); + width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); } @@ -90,16 +87,23 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, return lpt->data != NULL; } + + static boolean llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, - struct llvmpipe_texture * lpt) + struct llvmpipe_texture *lpt) { struct llvmpipe_winsys *winsys = screen->winsys; + /* Round up the surface size to a multiple of the tile size to + * avoid tile clipping. + */ + unsigned width = align(lpt->base.width0, TILE_SIZE); + unsigned height = align(lpt->base.height0, TILE_SIZE); + lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width0, - lpt->base.height0, + width, height, 16, &lpt->stride[0] ); @@ -107,9 +111,6 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, } - - - static struct pipe_texture * llvmpipe_texture_create(struct pipe_screen *_screen, const struct pipe_texture *templat) @@ -126,7 +127,7 @@ llvmpipe_texture_create(struct pipe_screen *_screen, /* XXX: The xlib state tracker is brain-dead and will request * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it. */ - if(lpt->base.format == PIPE_FORMAT_Z16_UNORM) + if (lpt->base.format == PIPE_FORMAT_Z16_UNORM) lpt->base.format = PIPE_FORMAT_Z32_UNORM; if (lpt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | @@ -178,6 +179,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, return &lpt->base; #else + debug_printf("llvmpipe_texture_blanket() not implemented!"); return NULL; #endif } @@ -189,12 +191,15 @@ llvmpipe_texture_destroy(struct pipe_texture *pt) struct llvmpipe_screen *screen = llvmpipe_screen(pt->screen); struct llvmpipe_texture *lpt = llvmpipe_texture(pt); - if(lpt->dt) { + if (lpt->dt) { + /* display target */ struct llvmpipe_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpt->dt); } - else + else { + /* regular texture */ align_free(lpt->data); + } FREE(lpt); } @@ -236,7 +241,7 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE)) { - /* Mark the surface as dirty. The tile cache will look for this. */ + /* Mark the surface as dirty. */ lpt->timestamp++; llvmpipe_screen(screen)->timestamp++; } @@ -298,8 +303,8 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, pipe_texture_reference(&pt->texture, texture); pt->x = x; pt->y = y; - pt->width = w; - pt->height = h; + pt->width = align(w, TILE_SIZE); + pt->height = align(h, TILE_SIZE); pt->stride = lptex->stride[level]; pt->usage = usage; pt->face = face; @@ -356,7 +361,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, lpt = llvmpipe_texture(transfer->texture); format = lpt->base.format; - if(lpt->dt) { + if (lpt->dt) { + /* display target */ struct llvmpipe_winsys *winsys = screen->winsys; map = winsys->displaytarget_map(winsys, lpt->dt, @@ -364,16 +370,16 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if (map == NULL) return NULL; } - else + else { + /* regular texture */ map = lpt->data; + } /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) - { + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. - * In llvmpipe, that would mean flushing the texture cache. */ screen->timestamp++; } @@ -387,29 +393,24 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, static void -llvmpipe_transfer_unmap(struct pipe_screen *_screen, +llvmpipe_transfer_unmap(struct pipe_screen *screen, struct pipe_transfer *transfer) { - struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + struct llvmpipe_screen *lp_screen = llvmpipe_screen(screen); struct llvmpipe_texture *lpt; assert(transfer->texture); lpt = llvmpipe_texture(transfer->texture); - if(lpt->dt) { - struct llvmpipe_winsys *winsys = screen->winsys; + if (lpt->dt) { + /* display target */ + struct llvmpipe_winsys *winsys = lp_screen->winsys; winsys->displaytarget_unmap(winsys, lpt->dt); } } void -llvmpipe_init_texture_funcs(struct llvmpipe_context *lp) -{ -} - - -void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = llvmpipe_texture_create; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 00a20763e43..87c905bc027 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -37,6 +37,7 @@ struct pipe_screen; struct llvmpipe_context; struct llvmpipe_displaytarget; + struct llvmpipe_texture { struct pipe_texture base; @@ -58,6 +59,7 @@ struct llvmpipe_texture unsigned timestamp; }; + struct llvmpipe_transfer { struct pipe_transfer base; @@ -73,6 +75,14 @@ llvmpipe_texture(struct pipe_texture *pt) return (struct llvmpipe_texture *) pt; } + +static INLINE const struct llvmpipe_texture * +llvmpipe_texture_const(const struct pipe_texture *pt) +{ + return (const struct llvmpipe_texture *) pt; +} + + static INLINE struct llvmpipe_transfer * llvmpipe_transfer(struct pipe_transfer *pt) { @@ -81,10 +91,7 @@ llvmpipe_transfer(struct pipe_transfer *pt) extern void -llvmpipe_init_texture_funcs( struct llvmpipe_context *llvmpipe ); - -extern void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); -#endif /* LP_TEXTURE */ +#endif /* LP_TEXTURE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c deleted file mode 100644 index 7a1ecf5107b..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ /dev/null @@ -1,358 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture tile caching. - * - * Author: - * Brian Paul - */ - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_tile.h" -#include "util/u_rect.h" -#include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tile_soa.h" -#include "lp_tile_cache.h" - - -#define MAX_WIDTH 4096 -#define MAX_HEIGHT 4096 - - -enum llvmpipe_tile_status -{ - LP_TILE_STATUS_UNDEFINED = 0, - LP_TILE_STATUS_CLEAR = 1, - LP_TILE_STATUS_DEFINED = 2 -}; - - -struct llvmpipe_cached_tile -{ - enum llvmpipe_tile_status status; - - /** color in SOA format */ - uint8_t *color; -}; - - -struct llvmpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; - - uint8_t clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - - struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ -}; - - -struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ) -{ - struct llvmpipe_tile_cache *tc; - int maxLevels, maxTexSize; - - /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ - maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); - maxTexSize = 1 << (maxLevels - 1); - assert(MAX_WIDTH >= maxTexSize); - - tc = CALLOC_STRUCT( llvmpipe_tile_cache ); - if(!tc) - return NULL; - - tc->screen = screen; - - return tc; -} - - -void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_screen *screen; - unsigned x, y; - - for (y = 0; y < MAX_HEIGHT; y += TILE_SIZE) { - for (x = 0; x < MAX_WIDTH; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->color) - align_free(tile->color); - } - } - - if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); - } - - FREE( tc ); -} - - -/** - * Specify the surface to cache. - */ -void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *ps) -{ - if (tc->transfer) { - struct pipe_screen *screen = tc->transfer->texture->screen; - - if (ps == tc->surface) - return; - - if (tc->transfer_map) { - screen->transfer_unmap(screen, tc->transfer); - tc->transfer_map = NULL; - } - - screen->tex_transfer_destroy(tc->transfer); - tc->transfer = NULL; - } - - tc->surface = ps; - - if (ps) { - struct pipe_screen *screen = ps->texture->screen; - unsigned x, y; - - tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face, - ps->level, ps->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, ps->width, ps->height); - - for (y = 0; y < ps->height; y += TILE_SIZE) { - for (x = 0; x < ps->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - tile->status = LP_TILE_STATUS_UNDEFINED; - - if(!tile->color) - tile->color = align_malloc( TILE_SIZE*TILE_SIZE*NUM_CHANNELS, 16 ); - } - } - } -} - - -/** - * Return the transfer being cached. - */ -struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc) -{ - return tc->surface; -} - - -void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); -} - - -void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); - tc->transfer_map = NULL; - } -} - - -/** - * Set a tile to a solid color. - */ -static void -clear_tile(struct llvmpipe_cached_tile *tile, - uint8_t clear_color[4]) -{ - if (clear_color[0] == clear_color[1] && - clear_color[1] == clear_color[2] && - clear_color[2] == clear_color[3]) { - memset(tile->color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); - } - else { - uint x, y, chan; - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(tile->color, x, y, chan) = clear_color[chan]; - } -} - - -/** - * Flush the tile cache: write all dirty tiles back to the transfer. - * any tiles "flagged" as cleared will be "really" cleared. - */ -void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_transfer *pt = tc->transfer; - unsigned x, y; - - if(!pt) - return; - - assert(tc->transfer_map); - - /* push the tile to all positions marked as clear */ - for (y = 0; y < pt->height; y += TILE_SIZE) { - for (x = 0; x < pt->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->status != LP_TILE_STATUS_UNDEFINED) { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - if (!pipe_clip_tile(x, y, &w, &h, pt)) { - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* Actually clear the tiles which were flagged as being in a - * clear state. */ - util_fill_rect(tc->transfer_map, pt->texture->format, pt->stride, - x, y, w, h, - tc->clear_val); - break; - - case LP_TILE_STATUS_DEFINED: - lp_tile_write_4ub(pt->texture->format, - tile->color, - tc->transfer_map, pt->stride, - x, y, w, h); - break; - - default: - assert(0); - break; - } - } - - tile->status = LP_TILE_STATUS_UNDEFINED; - } - } - } -} - - -/** - * Get a tile from the cache. - * \param x, y position of tile, in pixels - */ -void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ) -{ - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - struct pipe_transfer *pt = tc->transfer; - - assert(tc->surface); - assert(tc->transfer); - - if(!tc->transfer_map) - lp_tile_cache_map_transfers(tc); - - assert(tc->transfer_map); - - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* don't get tile from framebuffer, just clear it */ - clear_tile(tile, tc->clear_color); - tile->status = LP_TILE_STATUS_DEFINED; - break; - - case LP_TILE_STATUS_UNDEFINED: { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - x &= ~(TILE_SIZE - 1); - y &= ~(TILE_SIZE - 1); - - if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) - lp_tile_read_4ub(pt->texture->format, - tile->color, - tc->transfer_map, tc->transfer->stride, - x, y, w, h); - - tile->status = LP_TILE_STATUS_DEFINED; - break; - } - - case LP_TILE_STATUS_DEFINED: - /* nothing to do */ - break; - } - - return tile->color; -} - - -/** - * When a whole surface is being cleared to a value we can avoid - * fetching tiles above. - * Save the color and set a 'clearflag' for each tile of the screen. - */ -void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue) -{ - struct pipe_transfer *pt = tc->transfer; - const unsigned w = pt->width; - const unsigned h = pt->height; - unsigned x, y, chan; - - for(chan = 0; chan < 4; ++chan) - tc->clear_color[chan] = float_to_ubyte(rgba[chan]); - - tc->clear_val = clearValue; - - /* push the tile to all positions marked as clear */ - for (y = 0; y < h; y += TILE_SIZE) { - for (x = 0; x < w; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - tile->status = LP_TILE_STATUS_CLEAR; - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_tile_size.h b/src/gallium/drivers/llvmpipe/lp_tile_size.h new file mode 100644 index 00000000000..f0b983c0632 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_size.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_TILE_SIZE_H +#define LP_TILE_SIZE_H + + +/** + * Tile size (width and height). This needs to be a power of two. + */ +#define TILE_ORDER 6 +#define TILE_SIZE (1 << TILE_ORDER) + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 19d00b58d37..eea3ab84990 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -30,7 +30,7 @@ #include "pipe/p_compiler.h" #include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ - +#include "lp_tile_size.h" #ifdef __cplusplus extern "C" { @@ -40,26 +40,20 @@ extern "C" { struct pipe_transfer; -/** - * Cache tile size (width and height). This needs to be a power of two. - */ -#define TILE_SIZE 64 - - -#define TILE_VECTOR_HEIGHT 2 -#define TILE_VECTOR_WIDTH 8 +#define TILE_VECTOR_HEIGHT 4 +#define TILE_VECTOR_WIDTH 4 extern const unsigned char tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; -#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH) -#define TILE_X_STRIDE (NUM_CHANNELS*TILE_C_STRIDE) -#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT*TILE_SIZE*NUM_CHANNELS) +#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH) //16 +#define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) //64 +#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024 #define TILE_PIXEL(_p, _x, _y, _c) \ - ((_p)[((_y)/TILE_VECTOR_HEIGHT)*TILE_Y_STRIDE + \ - ((_x)/TILE_VECTOR_WIDTH)*TILE_X_STRIDE + \ - (_c)*TILE_C_STRIDE + \ + ((_p)[((_y) / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE + \ + ((_x) / TILE_VECTOR_WIDTH) * TILE_X_STRIDE + \ + (_c) * TILE_C_STRIDE + \ tile_offset[(_y) % TILE_VECTOR_HEIGHT][(_x) % TILE_VECTOR_WIDTH]]) diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 004c5c979e3..5d53689a3db 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -129,22 +129,8 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): print -def generate_format_write(format, src_type, src_native_type, src_suffix): - '''Generate the function to write pixels to a particular format''' - - name = short_name(format) - - dst_native_type = native_type(format) - - print 'static void' - print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) - print '{' - print ' unsigned x, y;' - print ' uint8_t *dst_row = dst + y0*dst_stride;' - print ' for (y = 0; y < h; ++y) {' - print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) - print ' for (x = 0; x < w; ++x) {' - +def compute_inverse_swizzle(format): + '''Return an array[4] of inverse swizzle terms''' inv_swizzle = [None]*4 if format.colorspace == 'rgb': for i in range(4): @@ -155,8 +141,86 @@ def generate_format_write(format, src_type, src_native_type, src_suffix): swizzle = format.out_swizzle[0] if swizzle < 4: inv_swizzle[swizzle] = 0 - else: - assert False + return inv_swizzle + + +def pack_rgba(format, src_type, r, g, b, a): + """Return an expression for packing r, g, b, a into a pixel of the + given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' + """ + assert format.colorspace == 'rgb' + inv_swizzle = compute_inverse_swizzle(format) + shift = 0 + expr = None + for i in range(4): + # choose r, g, b, or a depending on the inverse swizzle term + if inv_swizzle[i] == 0: + value = r + elif inv_swizzle[i] == 1: + value = g + elif inv_swizzle[i] == 2: + value = b + elif inv_swizzle[i] == 3: + value = a + else: + value = None + + if value: + dst_type = format.in_types[i] + dst_native_type = native_type(format) + value = conversion_expr(src_type, dst_type, dst_native_type, value) + term = "((%s) << %d)" % (value, shift) + if expr: + expr = expr + " | " + term + else: + expr = term + + width = format.in_types[i].size + shift = shift + width + return expr + + +def emit_unrolled_write_code(format, src_type): + '''Emit code for writing a block based on unrolled loops. + This is considerably faster than the TILE_PIXEL-based code below. + ''' + dst_native_type = native_type(format) + print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride() + print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) + print ' unsigned int qx, qy, i;' + print + print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {' + print ' const unsigned py = y0 + qy;' + print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {' + print ' const unsigned px = x0 + qx;' + print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;' + print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;' + print ' const uint8_t *b = src + 2 * TILE_C_STRIDE;' + print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;' + print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */' + print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {' + print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_type, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") + print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_type, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") + print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);' + print ' dstpix[offset + 0] = pixel0;' + print ' dstpix[offset + 1] = pixel1;' + print ' }' + print ' src += TILE_X_STRIDE;' + print ' }' + print ' }' + + +def emit_tile_pixel_write_code(format, src_type): + '''Emit code for writing a block based on the TILE_PIXEL macro.''' + dst_native_type = native_type(format) + + inv_swizzle = compute_inverse_swizzle(format) + + print ' unsigned x, y;' + print ' uint8_t *dst_row = dst + y0*dst_stride;' + print ' for (y = 0; y < h; ++y) {' + print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) + print ' for (x = 0; x < w; ++x) {' if format.layout == ARITH: print ' %s pixel = 0;' % dst_native_type @@ -185,6 +249,20 @@ def generate_format_write(format, src_type, src_native_type, src_suffix): print ' }' print ' dst_row += dst_stride;' print ' }' + + +def generate_format_write(format, src_type, src_native_type, src_suffix): + '''Generate the function to write pixels to a particular format''' + + name = short_name(format) + + print 'static void' + print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print '{' + if format.layout == ARITH and format.colorspace == 'rgb': + emit_unrolled_write_code(format, src_type) + else: + emit_tile_pixel_write_code(format, src_type) print '}' print @@ -259,8 +337,23 @@ def main(): print print 'const unsigned char' print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' - print ' { 0, 1, 4, 5, 8, 9, 12, 13},' - print ' { 2, 3, 6, 7, 10, 11, 14, 15}' + print ' { 0, 1, 4, 5},' + print ' { 2, 3, 6, 7},' + print ' { 8, 9, 12, 13},' + print ' { 10, 11, 14, 15}' + print '};' + print + print '/* Note: these lookup tables could be replaced with some' + print ' * bit-twiddling code, but this is a little faster.' + print ' */' + print 'static unsigned tile_x_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' + print ' 0, 1, 0, 1, 2, 3, 2, 3,' + print ' 0, 1, 0, 1, 2, 3, 2, 3' + print '};' + print + print 'static unsigned tile_y_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {' + print ' 0, 0, 1, 1, 0, 0, 1, 1,' + print ' 2, 2, 3, 3, 2, 2, 3, 3' print '};' print diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h index 74b472b6531..ce11fa93041 100644 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ b/src/gallium/drivers/llvmpipe/lp_winsys.h @@ -113,9 +113,6 @@ struct llvmpipe_winsys }; -struct pipe_context * -llvmpipe_create( struct pipe_screen * ); - struct pipe_screen * llvmpipe_create_screen( struct llvmpipe_winsys * ); diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 0cb66041d50..0e02680bc63 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = nouveau C_SOURCES = nouveau_screen.c \ - nouveau_context.c + nouveau_context.c \ + nv04_surface_2d.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 7ebc94ed6c7..156cb2d229b 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -3,7 +3,9 @@ #include <pipe/p_state.h> #include <util/u_memory.h> +#include <util/u_inlines.h> +#include <stdio.h> #include <errno.h> #include "nouveau/nouveau_bo.h" @@ -260,6 +262,8 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) void nouveau_screen_fini(struct nouveau_screen *screen) { + struct pipe_winsys *ws = screen->base.winsys; nouveau_channel_free(&screen->channel); + ws->destroy(ws); } diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 4c3e08a43f5..af9ddd558c8 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -2,7 +2,7 @@ #define NOUVEAU_WINSYS_H #include <stdint.h> -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_defines.h" #include "nouveau/nouveau_bo.h" @@ -27,39 +27,12 @@ #define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) extern struct pipe_screen * -nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *); - -extern struct pipe_context * -nv04_create(struct pipe_screen *, unsigned pctx_id); - -extern struct pipe_screen * -nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *); - -extern struct pipe_context * -nv10_create(struct pipe_screen *, unsigned pctx_id); - -extern struct pipe_screen * -nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *); - -extern struct pipe_context * -nv20_create(struct pipe_screen *, unsigned pctx_id); - -extern struct pipe_screen * nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *); -extern struct pipe_context * -nv30_create(struct pipe_screen *, unsigned pctx_id); - extern struct pipe_screen * nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *); -extern struct pipe_context * -nv40_create(struct pipe_screen *, unsigned pctx_id); - extern struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); -extern struct pipe_context * -nv50_create(struct pipe_screen *, unsigned pctx_id); - #endif diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nouveau/nv04_surface_2d.c index b24a9cee5ae..42c2ca932d8 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nouveau/nv04_surface_2d.c @@ -60,17 +60,17 @@ nv04_scaled_image_format(enum pipe_format format) case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: - return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; case PIPE_FORMAT_A1R5G5B5_UNORM: - return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; case PIPE_FORMAT_A8R8G8B8_UNORM: - return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; case PIPE_FORMAT_X8R8G8B8_UNORM: - return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_R16_SNORM: case PIPE_FORMAT_A8L8_UNORM: - return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; default: return -1; } @@ -155,7 +155,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); - BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); + BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); OUT_RELOCo(chan, src_bo, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); @@ -173,22 +173,22 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); - OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); + BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); + OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); - OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); - OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); - OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); + OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); + OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); + OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); + OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); + OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); - BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); - OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w); + BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); + OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w); OUT_RING (chan, src_pitch | - NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | - NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); + NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | + NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format), NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RING (chan, 0); @@ -421,12 +421,12 @@ nv04_surface_2d_init(struct nouveau_screen *screen) return NULL; } - BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); + BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1); OUT_RING (chan, ctx->ntfy->handle); BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1); OUT_RING (chan, ctx->surf2d->handle); - BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_OPERATION, 1); - OUT_RING (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY); + BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1); + OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY); ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT, &ctx->rect); diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nouveau/nv04_surface_2d.h index ce696a11a39..ce696a11a39 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.h +++ b/src/gallium/drivers/nouveau/nv04_surface_2d.h diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile deleted file mode 100644 index 7c14bacb1de..00000000000 --- a/src/gallium/drivers/nv04/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv04 - -C_SOURCES = \ - nv04_surface_2d.c \ - nv04_clear.c \ - nv04_context.c \ - nv04_fragprog.c \ - nv04_fragtex.c \ - nv04_miptree.c \ - nv04_prim_vbuf.c \ - nv04_screen.c \ - nv04_state.c \ - nv04_state_emit.c \ - nv04_surface.c \ - nv04_transfer.c \ - nv04_vbo.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c deleted file mode 100644 index 01cacd36fe1..00000000000 --- a/src/gallium/drivers/nv04/nv04_clear.c +++ /dev/null @@ -1,12 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "nv04_context.h" - -void -nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue) -{ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); -} diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c deleted file mode 100644 index edd96859cf8..00000000000 --- a/src/gallium/drivers/nv04/nv04_context.c +++ /dev/null @@ -1,112 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" - -#include "nv04_context.h" -#include "nv04_screen.h" - -static void -nv04_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nv04_context *nv04 = nv04_context(pipe); - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - - draw_flush(nv04->draw); - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv04_destroy(struct pipe_context *pipe) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - if (nv04->draw) - draw_destroy(nv04->draw); - - FREE(nv04); -} - -static boolean -nv04_init_hwctx(struct nv04_context *nv04) -{ - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - - // requires a valid handle -// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); -// OUT_RING(0); - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); - OUT_RING(chan, 0); - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(chan, 0x40182800); -// OUT_RING(1<<20/*no cull*/); - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); -// OUT_RING(0x24|(1<<6)|(1<<8)); - OUT_RING(chan, 0x120001a4); - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); - OUT_RING(chan, 0x332213a1); - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); - OUT_RING(chan, 0x11001010); - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); - OUT_RING(chan, 0x0); -// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); -// OUT_RING(SCREEN_OFFSET); - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); - OUT_RING(chan, 0xff000000); - - - - FIRE_RING (chan); - return TRUE; -} - -struct pipe_context * -nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) -{ - struct nv04_screen *screen = nv04_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nv04_context *nv04; - struct nouveau_winsys *nvws = screen->nvws; - - nv04 = CALLOC(1, sizeof(struct nv04_context)); - if (!nv04) - return NULL; - nv04->screen = screen; - nv04->pctx_id = pctx_id; - - nv04->nvws = nvws; - - nv04->pipe.winsys = ws; - nv04->pipe.screen = pscreen; - nv04->pipe.destroy = nv04_destroy; - nv04->pipe.draw_arrays = nv04_draw_arrays; - nv04->pipe.draw_elements = nv04_draw_elements; - nv04->pipe.clear = nv04_clear; - nv04->pipe.flush = nv04_flush; - - nv04->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nv04->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - nv04_init_surface_functions(nv04); - nv04_init_state_functions(nv04); - - nv04->draw = draw_create(); - assert(nv04->draw); - draw_wide_point_threshold(nv04->draw, 0.0); - draw_wide_line_threshold(nv04->draw, 0.0); - draw_enable_line_stipple(nv04->draw, FALSE); - draw_enable_point_sprites(nv04->draw, FALSE); - draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04)); - - nv04_init_hwctx(nv04); - - return &nv04->pipe; -} - diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h deleted file mode 100644 index fe3b527423c..00000000000 --- a/src/gallium/drivers/nv04/nv04_context.h +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef __NV04_CONTEXT_H__ -#define __NV04_CONTEXT_H__ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_compiler.h" - -#include "util/u_memory.h" -#include "util/u_math.h" - -#include "draw/draw_vertex.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" - -#include "nv04_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -#include "nv04_screen.h" - -#define NV04_NEW_VERTPROG (1 << 1) -#define NV04_NEW_FRAGPROG (1 << 2) -#define NV04_NEW_BLEND (1 << 3) -#define NV04_NEW_RAST (1 << 4) -#define NV04_NEW_CONTROL (1 << 5) -#define NV04_NEW_VIEWPORT (1 << 6) -#define NV04_NEW_SAMPLER (1 << 7) -#define NV04_NEW_FRAMEBUFFER (1 << 8) -#define NV04_NEW_VTXARRAYS (1 << 9) - -struct nv04_context { - struct pipe_context pipe; - - struct nouveau_winsys *nvws; - struct nv04_screen *screen; - unsigned pctx_id; - - struct draw_context *draw; - - int chipset; - struct nouveau_notifier *sync; - - uint32_t dirty; - - struct nv04_blend_state *blend; - struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - struct nv04_fragtex_state fragtex; - struct nv04_rasterizer_state *rast; - struct nv04_depth_stencil_alpha_state *dsa; - - struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; - - uint32_t rt_enable; - struct pipe_framebuffer_state *framebuffer; - struct pipe_surface *rt; - struct pipe_surface *zeta; - - struct { - struct pipe_buffer *buffer; - uint32_t format; - } tex[16]; - - unsigned vb_enable; - struct { - struct pipe_buffer *buffer; - unsigned delta; - } vb[16]; - - float *constbuf[PIPE_SHADER_TYPES][32][4]; - unsigned constbuf_nr[PIPE_SHADER_TYPES]; - - struct vertex_info vertex_info; - struct { - - struct nouveau_resource *exec_heap; - struct nouveau_resource *data_heap; - - struct nv04_vertex_program *active; - - struct nv04_vertex_program *current; - struct pipe_buffer *constant_buf; - } vertprog; - - struct { - struct nv04_fragment_program *active; - - struct nv04_fragment_program *current; - struct pipe_buffer *constant_buf; - } fragprog; - - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; - - struct pipe_viewport_state viewport; -}; - -static INLINE struct nv04_context * -nv04_context(struct pipe_context *pipe) -{ - return (struct nv04_context *)pipe; -} - -extern void nv04_init_state_functions(struct nv04_context *nv04); -extern void nv04_init_surface_functions(struct nv04_context *nv04); -extern void nv04_screen_init_miptree_functions(struct pipe_screen *screen); - -/* nv04_clear.c */ -extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - -/* nv04_draw.c */ -extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04); - -/* nv04_fragprog.c */ -extern void nv04_fragprog_bind(struct nv04_context *, - struct nv04_fragment_program *); -extern void nv04_fragprog_destroy(struct nv04_context *, - struct nv04_fragment_program *); - -/* nv04_fragtex.c */ -extern void nv04_fragtex_bind(struct nv04_context *); - -/* nv04_prim_vbuf.c */ -struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ); - -/* nv04_state.c and friends */ -extern void nv04_emit_hw_state(struct nv04_context *nv04); -extern void nv04_state_tex_update(struct nv04_context *nv04); - -/* nv04_vbo.c */ -extern void nv04_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv04_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count); - - -#endif diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c deleted file mode 100644 index 8a2af41fe06..00000000000 --- a/src/gallium/drivers/nv04/nv04_fragprog.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv04_context.h" - -void -nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp) -{ -} - -void -nv04_fragprog_destroy(struct nv04_context *nv04, - struct nv04_fragment_program *fp) -{ -} - diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c deleted file mode 100644 index c152b52119a..00000000000 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ /dev/null @@ -1,73 +0,0 @@ -#include "nv04_context.h" -#include "nouveau/nouveau_util.h" - -#define _(m,tf) \ -{ \ - PIPE_FORMAT_##m, \ - NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ -} - -struct nv04_texture_format { - uint pipe; - int format; -}; - -static struct nv04_texture_format -nv04_texture_formats[] = { - _(A8R8G8B8_UNORM, A8R8G8B8), - _(X8R8G8B8_UNORM, X8R8G8B8), - _(A1R5G5B5_UNORM, A1R5G5B5), - _(A4R4G4B4_UNORM, A4R4G4B4), - _(L8_UNORM, Y8 ), - _(A8_UNORM, Y8 ), -}; - -static uint32_t -nv04_fragtex_format(uint pipe_format) -{ - struct nv04_texture_format *tf = nv04_texture_formats; - int i; - - for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) { - if (tf->pipe == pipe_format) - return tf->format; - tf++; - } - - NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); - return 0; -} - - -static void -nv04_fragtex_build(struct nv04_context *nv04, int unit) -{ - struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; - struct pipe_texture *pt = &nv04mt->base; - - switch (pt->target) { - case PIPE_TEXTURE_2D: - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; - } - - nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER - | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER - | nv04_fragtex_format(pt->format) - | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) - | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE - | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE - ; -} - - -void -nv04_fragtex_bind(struct nv04_context *nv04) -{ - nv04_fragtex_build(nv04, 0); -} - diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c deleted file mode 100644 index e0a6948aeb4..00000000000 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ /dev/null @@ -1,146 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "util/u_math.h" - -#include "nv04_context.h" -#include "nv04_screen.h" - -static void -nv04_miptree_layout(struct nv04_miptree *nv04mt) -{ - struct pipe_texture *pt = &nv04mt->base; - uint offset = 0; - int nr_faces, l; - - nr_faces = 1; - - for (l = 0; l <= pt->last_level; l++) { - nv04mt->level[l].pitch = pt->width0; - nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; - } - - for (l = 0; l <= pt->last_level; l++) { - nv04mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - /* XXX guess was obviously missing */ - nv04mt->level[l].image_offset[0] = offset; - offset += nv04mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv04mt->total_size = offset; -} - -static struct pipe_texture * -nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) -{ - struct nv04_miptree *mt; - - mt = MALLOC(sizeof(struct nv04_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - - //mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - nv04_miptree_layout(mt); - - mt->buffer = pscreen->buffer_create(pscreen, 256, PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE, - mt->total_size); - if (!mt->buffer) { - printf("failed %d byte alloc\n",mt->total_size); - FREE(mt); - return NULL; - } - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static struct pipe_texture * -nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) -{ - struct nv04_miptree *mt; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nv04_miptree); - if (!mt) - return NULL; - - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static void -nv04_miptree_destroy(struct pipe_texture *pt) -{ - struct nv04_miptree *mt = (struct nv04_miptree *)pt; - int l; - - pipe_buffer_reference(&mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (mt->level[l].image_offset) - FREE(mt->level[l].image_offset); - } - - FREE(mt); -} - -static struct pipe_surface * -nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_texture_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = nv04mt->level[level].pitch; - - ns->base.offset = nv04mt->level[level].image_offset[0]; - - return &ns->base; -} - -static void -nv04_miptree_surface_del(struct pipe_surface *ps) -{ - pipe_texture_reference(&ps->texture, NULL); - FREE(ps); -} - -void -nv04_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv04_miptree_create; - pscreen->texture_blanket = nv04_miptree_blanket; - pscreen->texture_destroy = nv04_miptree_destroy; - pscreen->get_tex_surface = nv04_miptree_surface_new; - pscreen->tex_surface_destroy = nv04_miptree_surface_del; -} - diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c deleted file mode 100644 index 0b795ea2430..00000000000 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ /dev/null @@ -1,339 +0,0 @@ - -#include "util/u_debug.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_compiler.h" - -#include "draw/draw_vbuf.h" - -#include "nv04_context.h" -#include "nv04_state.h" - -#define VERTEX_SIZE 40 -#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each - -/** - * Primitive renderer for nv04. - */ -struct nv04_vbuf_render { - struct vbuf_render base; - - struct nv04_context *nv04; - - /** Vertex buffer */ - unsigned char* buffer; - - /** Vertex size in bytes */ - unsigned vertex_size; - - /** Current primitive */ - unsigned prim; -}; - - -/** - * Basically a cast wrapper. - */ -static INLINE struct nv04_vbuf_render * -nv04_vbuf_render( struct vbuf_render *render ) -{ - assert(render); - return (struct nv04_vbuf_render *)render; -} - - -static const struct vertex_info * -nv04_vbuf_render_get_vertex_info( struct vbuf_render *render ) -{ - struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - struct nv04_context *nv04 = nv04_render->nv04; - return &nv04->vertex_info; -} - - -static boolean -nv04_vbuf_render_allocate_vertices( struct vbuf_render *render, - ushort vertex_size, - ushort nr_vertices ) -{ - struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - - nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE); - assert(!nv04_render->buffer); - - return nv04_render->buffer ? TRUE : FALSE; -} - -static void * -nv04_vbuf_render_map_vertices( struct vbuf_render *render ) -{ - struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - return nv04_render->buffer; -} - -static void -nv04_vbuf_render_unmap_vertices( struct vbuf_render *render, - ushort min_index, - ushort max_index ) -{ -} - -static boolean -nv04_vbuf_render_set_primitive( struct vbuf_render *render, - unsigned prim ) -{ - struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - - if (prim <= PIPE_PRIM_LINE_STRIP) - return FALSE; - - nv04_render->prim = prim; - return TRUE; -} - -static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) -{ - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8); - OUT_RING(chan, 0xFEDCBA); -} - -static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) -{ - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); - OUT_RING(chan, 0xFED); -} - -static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) -{ - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); - OUT_RING(chan, 0xFECEDC); -} - -static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) -{ - unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; - int i; - - for( i=0; i< nr_indices-5; i+=6) - nv04_2triangles(nv04, - buffer, - indices[i+0], - indices[i+1], - indices[i+2], - indices[i+3], - indices[i+4], - indices[i+5] - ); - if (i != nr_indices) - { - nv04_1triangle(nv04, - buffer, - indices[i+0], - indices[i+1], - indices[i+2] - ); - i+=3; - } - if (i != nr_indices) - NOUVEAU_ERR("Houston, we have lost some vertices\n"); -} - -static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) -{ - const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; - unsigned char* buffer = render->buffer; - struct nv04_context *nv04 = render->nv04; - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - int i,j; - - for(i = 0; i<nr_indices; i+=14) - { - int numvert = MIN2(16, nr_indices - i); - int numtri = numvert - 2; - if (numvert<3) - break; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8); - for(j = 0; j<numvert; j++) - OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 ); - - BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); - for(j = 0; j<numtri/2; j++ ) - OUT_RING(chan, striptbl[j]); - if (numtri%2) - OUT_RING(chan, striptbl[numtri/2]&0xFFF); - } -} - -static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) -{ - const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; - unsigned char* buffer = render->buffer; - struct nv04_context *nv04 = render->nv04; - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - int i,j; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); - OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8); - - for(i = 1; i<nr_indices; i+=14) - { - int numvert=MIN2(15, nr_indices - i); - int numtri=numvert-2; - if (numvert < 3) - break; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); - - for(j=0;j<numvert;j++) - OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - - BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); - for(j = 0; j<numtri/2; j++) - OUT_RING(chan, fantbl[j]); - if (numtri%2) - OUT_RING(chan, fantbl[numtri/2]&0xFFF); - } -} - -static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) -{ - unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; - int i; - - for(i = 0; i < nr_indices; i += 4) - nv04_1quad(nv04, - buffer, - indices[i+0], - indices[i+1], - indices[i+2], - indices[i+3] - ); -} - - -static void -nv04_vbuf_render_draw( struct vbuf_render *render, - const ushort *indices, - uint nr_indices) -{ - struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - - // emit the indices - switch( nv04_render->prim ) - { - case PIPE_PRIM_TRIANGLES: - nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices); - break; - case PIPE_PRIM_QUAD_STRIP: - case PIPE_PRIM_TRIANGLE_STRIP: - nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices); - break; - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices); - break; - case PIPE_PRIM_QUADS: - nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices); - break; - default: - NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim); - break; - } -} - - -static void -nv04_vbuf_render_release_vertices( struct vbuf_render *render ) -{ - struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - - free(nv04_render->buffer); - nv04_render->buffer = NULL; -} - - -static void -nv04_vbuf_render_destroy( struct vbuf_render *render ) -{ - struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - FREE(nv04_render); -} - - -/** - * Create a new primitive render. - */ -static struct vbuf_render * -nv04_vbuf_render_create( struct nv04_context *nv04 ) -{ - struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render); - - nv04_render->nv04 = nv04; - - nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE; - nv04_render->base.max_indices = 65536; - nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info; - nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices; - nv04_render->base.map_vertices = nv04_vbuf_render_map_vertices; - nv04_render->base.unmap_vertices = nv04_vbuf_render_unmap_vertices; - nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive; - nv04_render->base.draw = nv04_vbuf_render_draw; - nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices; - nv04_render->base.destroy = nv04_vbuf_render_destroy; - - return &nv04_render->base; -} - - -/** - * Create a new primitive vbuf/render stage. - */ -struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ) -{ - struct vbuf_render *render; - struct draw_stage *stage; - - render = nv04_vbuf_render_create(nv04); - if(!render) - return NULL; - - stage = draw_vbuf_stage( nv04->draw, render ); - if(!stage) { - render->destroy(render); - return NULL; - } - - return stage; -} diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c deleted file mode 100644 index 7c5b6e8229a..00000000000 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ /dev/null @@ -1,212 +0,0 @@ -#include "pipe/p_screen.h" -#include "pipe/p_inlines.h" - -#include "nv04_context.h" -#include "nv04_screen.h" - -static int -nv04_screen_get_param(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 1; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 0; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 0; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 0; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 0; - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 0; - case NOUVEAU_CAP_HW_VTXBUF: - case NOUVEAU_CAP_HW_IDXBUF: - return 0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv04_screen_get_paramf(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 0.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 0.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 0.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 0.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv04_screen_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static void -nv04_screen_destroy(struct pipe_screen *pscreen) -{ - struct nv04_screen *screen = nv04_screen(pscreen); - - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->fahrenheit); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -static struct pipe_buffer * -nv04_surface_buffer(struct pipe_surface *surf) -{ - struct nv04_miptree *mt = (struct nv04_miptree *)surf->texture; - - return mt->buffer; -} - -struct pipe_screen * -nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - unsigned fahrenheit_class = 0, sub3d_class = 0; - int ret; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv04_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv04_screen_destroy; - pscreen->get_param = nv04_screen_get_param; - pscreen->get_paramf = nv04_screen_get_paramf; - pscreen->is_format_supported = nv04_screen_is_format_supported; - - nv04_screen_init_miptree_functions(pscreen); - nv04_screen_init_transfer_functions(pscreen); - - if (dev->chipset >= 0x20) { - fahrenheit_class = 0; - sub3d_class = 0; - } else if (dev->chipset >= 0x10) { - fahrenheit_class = NV10_TEXTURED_TRIANGLE; - sub3d_class = NV10_CONTEXT_SURFACES_3D; - } else { - fahrenheit_class=NV04_TEXTURED_TRIANGLE; - sub3d_class = NV04_CONTEXT_SURFACES_3D; - } - - if (!fahrenheit_class) { - NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", dev->chipset); - return NULL; - } - - /* 3D object */ - ret = nouveau_grobj_alloc(chan, 0xbeef0001, fahrenheit_class, - &screen->fahrenheit); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return NULL; - } - BIND_RING(chan, screen->fahrenheit, 7); - - /* 3D surface object */ - ret = nouveau_grobj_alloc(chan, 0xbeef0002, sub3d_class, - &screen->context_surfaces_3d); - if (ret) { - NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret); - return NULL; - } - BIND_RING(chan, screen->context_surfaces_3d, 6); - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv04_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv04_screen_destroy(pscreen); - return NULL; - } - - return pscreen; -} - diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h deleted file mode 100644 index 11466b9442c..00000000000 --- a/src/gallium/drivers/nv04/nv04_screen.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef __NV04_SCREEN_H__ -#define __NV04_SCREEN_H__ - -#include "nouveau/nouveau_screen.h" -#include "nv04_surface_2d.h" - -struct nv04_screen { - struct nouveau_screen base; - - struct nouveau_winsys *nvws; - unsigned chipset; - - /* HW graphics objects */ - struct nv04_surface_2d *eng2d; - struct nouveau_grobj *fahrenheit; - struct nouveau_grobj *context_surfaces_3d; - struct nouveau_notifier *sync; - -}; - -static INLINE struct nv04_screen * -nv04_screen(struct pipe_screen *screen) -{ - return (struct nv04_screen *)screen; -} - -void -nv04_screen_init_transfer_functions(struct pipe_screen *pscreen); - -#endif diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c deleted file mode 100644 index e3dc4c5bf44..00000000000 --- a/src/gallium/drivers/nv04/nv04_state.c +++ /dev/null @@ -1,459 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_inlines.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv04_context.h" -#include "nv04_state.h" - -static void * -nv04_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nv04_blend_state *cb; - - cb = MALLOC(sizeof(struct nv04_blend_state)); - - cb->b_enable = cso->blend_enable ? 1 : 0; - cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | - (nvgl_blend_func(cso->rgb_src_factor))); - cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | - (nvgl_blend_func(cso->rgb_dst_factor))); - - - return (void *)cb; -} - -static void -nv04_blend_state_bind(struct pipe_context *pipe, void *blend) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - nv04->blend = (struct nv04_blend_state*)blend; - - nv04->dirty |= NV04_NEW_BLEND; -} - -static void -nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; - } - return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; -} - -static void * -nv04_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - - struct nv04_sampler_state *ss; - uint32_t filter = 0; - - ss = MALLOC(sizeof(struct nv04_sampler_state)); - - ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | - (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); - - if (cso->max_anisotropy > 1.0) { - filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ss->filter = filter; - - return (void *)ss; -} - -static void -nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nv04_context *nv04 = nv04_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nv04->sampler[unit] = sampler[unit]; - nv04->dirty_samplers |= (1 << unit); - } -} - -static void -nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - -static void -nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **miptree) -{ - struct nv04_context *nv04 = nv04_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit]; - nv04->dirty_samplers |= (1 << unit); - } -} - -static void * -nv04_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nv04_rasterizer_state *rs; - - /*XXX: ignored: - * scissor - * points/lines (no hw support, emulated with tris in gallium) - */ - rs = MALLOC(sizeof(struct nv04_rasterizer_state)); - - rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; - - return (void *)rs; -} - -static void -nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - nv04->rast = (struct nv04_rasterizer_state*)rast; - - draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL)); - - nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND; -} - -static void -nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - -static INLINE uint32_t nv04_compare_func(uint32_t f) -{ - switch ( f ) { - case PIPE_FUNC_NEVER: return 1; - case PIPE_FUNC_LESS: return 2; - case PIPE_FUNC_EQUAL: return 3; - case PIPE_FUNC_LEQUAL: return 4; - case PIPE_FUNC_GREATER: return 5; - case PIPE_FUNC_NOTEQUAL: return 6; - case PIPE_FUNC_GEQUAL: return 7; - case PIPE_FUNC_ALWAYS: return 8; - } - NOUVEAU_MSG("Unable to find the function\n"); - return 0; -} - -static void * -nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nv04_depth_stencil_alpha_state *hw; - - hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); - - hw->control = float_to_ubyte(cso->alpha.ref_value); - hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); - hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0; - hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN; - hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0; - hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); - hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module - hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; - hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; - hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0; - hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format - - return (void *)hw; -} - -static void -nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - nv04->dsa = hwcso; - nv04->dirty |= NV04_NEW_CONTROL; -} - -static void -nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - -static void * -nv04_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - return draw_create_vertex_shader(nv04->draw, templ); -} - -static void -nv04_vp_state_bind(struct pipe_context *pipe, void *shader) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); - - nv04->dirty |= NV04_NEW_VERTPROG; -} - -static void -nv04_vp_state_delete(struct pipe_context *pipe, void *shader) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); -} - -static void * -nv04_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv04_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nv04_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - return (void *)fp; -} - -static void -nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv04_context *nv04 = nv04_context(pipe); - struct nv04_fragment_program *fp = hwcso; - - nv04->fragprog.current = fp; - nv04->dirty |= NV04_NEW_FRAGPROG; -} - -static void -nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv04_context *nv04 = nv04_context(pipe); - struct nv04_fragment_program *fp = hwcso; - - nv04_fragprog_destroy(nv04, fp); - free((void*)fp->pipe.tokens); - free(fp); -} - -static void -nv04_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ -} - -static void -nv04_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ -} - -static void -nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) -{ - struct nv04_context *nv04 = nv04_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - - assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); - - if (buf) { - void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) - { - memcpy(nv04->constbuf[shader], mapped, buf->buffer->size); - nv04->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); - } - } -} - -static void -nv04_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - nv04->framebuffer = (struct pipe_framebuffer_state*)fb; - - nv04->dirty |= NV04_NEW_FRAMEBUFFER; -} -static void -nv04_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - NOUVEAU_ERR("line stipple hahaha\n"); -} - -static void -nv04_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ -/* struct nv04_context *nv04 = nv04_context(pipe); - - // XXX - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ -} - -static void -nv04_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *viewport) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - nv04->viewport = *viewport; - - draw_set_viewport_state(nv04->draw, &nv04->viewport); -} - -static void -nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - memcpy(nv04->vtxbuf, buffers, count * sizeof(buffers[0])); - nv04->dirty |= NV04_NEW_VTXARRAYS; - - draw_set_vertex_buffers(nv04->draw, count, buffers); -} - -static void -nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_element *elements) -{ - struct nv04_context *nv04 = nv04_context(pipe); - - memcpy(nv04->vtxelt, elements, sizeof(*elements) * count); - nv04->dirty |= NV04_NEW_VTXARRAYS; - - draw_set_vertex_elements(nv04->draw, count, elements); -} - -void -nv04_init_state_functions(struct nv04_context *nv04) -{ - nv04->pipe.create_blend_state = nv04_blend_state_create; - nv04->pipe.bind_blend_state = nv04_blend_state_bind; - nv04->pipe.delete_blend_state = nv04_blend_state_delete; - - nv04->pipe.create_sampler_state = nv04_sampler_state_create; - nv04->pipe.bind_fragment_sampler_states = nv04_sampler_state_bind; - nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; - nv04->pipe.set_fragment_sampler_textures = nv04_set_sampler_texture; - - nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; - nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; - nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete; - - nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create; - nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind; - nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete; - - nv04->pipe.create_vs_state = nv04_vp_state_create; - nv04->pipe.bind_vs_state = nv04_vp_state_bind; - nv04->pipe.delete_vs_state = nv04_vp_state_delete; - - nv04->pipe.create_fs_state = nv04_fp_state_create; - nv04->pipe.bind_fs_state = nv04_fp_state_bind; - nv04->pipe.delete_fs_state = nv04_fp_state_delete; - - nv04->pipe.set_blend_color = nv04_set_blend_color; - nv04->pipe.set_clip_state = nv04_set_clip_state; - nv04->pipe.set_constant_buffer = nv04_set_constant_buffer; - nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state; - nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple; - nv04->pipe.set_scissor_state = nv04_set_scissor_state; - nv04->pipe.set_viewport_state = nv04_set_viewport_state; - - nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers; - nv04->pipe.set_vertex_elements = nv04_set_vertex_elements; -} - diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h deleted file mode 100644 index 81d1d2ebaa9..00000000000 --- a/src/gallium/drivers/nv04/nv04_state.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef __NV04_STATE_H__ -#define __NV04_STATE_H__ - -#include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" - -struct nv04_blend_state { - uint32_t b_enable; - uint32_t b_src; - uint32_t b_dst; -}; - -struct nv04_fragtex_state { - uint32_t format; -}; - -struct nv04_sampler_state { - uint32_t filter; - uint32_t format; -}; - -struct nv04_depth_stencil_alpha_state { - uint32_t control; -}; - -struct nv04_rasterizer_state { - uint32_t blend; - - const struct pipe_rasterizer_state *templ; -}; - -struct nv04_miptree { - struct pipe_texture base; - struct nouveau_bo *bo; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[PIPE_MAX_TEXTURE_LEVELS]; -}; - -struct nv04_fragment_program_data { - unsigned offset; - unsigned index; -}; - -struct nv04_fragment_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - - boolean translated; - boolean on_hw; - unsigned samplers; - - uint32_t *insn; - int insn_len; - - struct nv04_fragment_program_data *consts; - unsigned nr_consts; - - struct pipe_buffer *buffer; - - uint32_t fp_control; - uint32_t fp_reg_control; -}; - - - -#endif diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c deleted file mode 100644 index b8d6dc560f0..00000000000 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ /dev/null @@ -1,246 +0,0 @@ -#include "nv04_context.h" -#include "nv04_state.h" - -static void nv04_vertex_layout(struct pipe_context* pipe) -{ - struct nv04_context *nv04 = nv04_context(pipe); - struct nv04_fragment_program *fp = nv04->fragprog.current; - uint32_t src = 0; - int i; - struct vertex_info vinfo; - - memset(&vinfo, 0, sizeof(vinfo)); - - for (i = 0; i < fp->info.num_inputs; i++) { - int isn = fp->info.input_semantic_name[i]; - int isi = fp->info.input_semantic_index[i]; - switch (isn) { - case TGSI_SEMANTIC_POSITION: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); - break; - case TGSI_SEMANTIC_COLOR: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); - break; - default: - case TGSI_SEMANTIC_GENERIC: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - break; - case TGSI_SEMANTIC_FOG: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - break; - } - } - - printf("%d vertex input\n",fp->info.num_inputs); - draw_compute_vertex_size(&vinfo); -} - -static uint32_t nv04_blend_func(uint32_t f) -{ - switch ( f ) { - case PIPE_BLENDFACTOR_ZERO: return 0x1; - case PIPE_BLENDFACTOR_ONE: return 0x2; - case PIPE_BLENDFACTOR_SRC_COLOR: return 0x3; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: return 0x4; - case PIPE_BLENDFACTOR_SRC_ALPHA: return 0x5; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return 0x6; - case PIPE_BLENDFACTOR_DST_ALPHA: return 0x7; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: return 0x8; - case PIPE_BLENDFACTOR_DST_COLOR: return 0x9; - case PIPE_BLENDFACTOR_INV_DST_COLOR: return 0xA; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return 0xB; - } - NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f); - return 0; -} - -static void nv04_emit_control(struct nv04_context* nv04) -{ - uint32_t control = nv04->dsa->control; - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(chan, control); -} - -static void nv04_emit_blend(struct nv04_context* nv04) -{ - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - uint32_t blend; - - blend=0x4; // texture MODULATE_ALPHA - blend|=0x20; // alpha is MSB - blend|=(2<<6); // flat shading - blend|=(1<<8); // persp correct - blend|=(0<<16); // no fog - blend|=(nv04->blend->b_enable<<20); - blend|=(nv04_blend_func(nv04->blend->b_src)<<24); - blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); - OUT_RING(chan, blend); -} - -static void nv04_emit_sampler(struct nv04_context *nv04, int unit) -{ - struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; - struct pipe_texture *pt = &nv04mt->base; - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); - OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING(chan, nv04->sampler[unit]->filter); -} - -static void nv04_state_emit_framebuffer(struct nv04_context* nv04) -{ - struct pipe_framebuffer_state* fb = nv04->framebuffer; - struct nv04_surface *rt, *zeta; - uint32_t rt_format, w, h; - int colour_format = 0, zeta_format = 0; - struct nv04_miptree *nv04mt = 0; - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; - struct nouveau_bo *bo; - - w = fb->cbufs[0]->width; - h = fb->cbufs[0]->height; - colour_format = fb->cbufs[0]->format; - rt = (struct nv04_surface *)fb->cbufs[0]; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - switch (colour_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format = 0x108; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format = 0x103; - break; - default: - assert(0); - } - - BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); - OUT_RING(chan, rt_format); - - nv04mt = (struct nv04_miptree *)rt->base.texture; - bo = nouveau_bo(nv04mt->buffer); - /* FIXME pitches have to be aligned ! */ - BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(chan, rt->pitch|(zeta->pitch<<16)); - OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - if (fb->zsbuf) { - nv04mt = (struct nv04_miptree *)zeta->base.texture; - BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - } -} - -void -nv04_emit_hw_state(struct nv04_context *nv04) -{ - struct nv04_screen *screen = nv04->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *fahrenheit = screen->fahrenheit; - struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; - int i; - - if (nv04->dirty & NV04_NEW_VERTPROG) { - //nv04_vertprog_bind(nv04, nv04->vertprog.current); - nv04->dirty &= ~NV04_NEW_VERTPROG; - } - - if (nv04->dirty & NV04_NEW_FRAGPROG) { - nv04_fragprog_bind(nv04, nv04->fragprog.current); - nv04->dirty &= ~NV04_NEW_FRAGPROG; - nv04->dirty_samplers |= (1<<10); - nv04->dirty_samplers = 0; - } - - if (nv04->dirty & NV04_NEW_CONTROL) { - nv04->dirty &= ~NV04_NEW_CONTROL; - - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(chan, nv04->dsa->control); - } - - if (nv04->dirty & NV04_NEW_BLEND) { - nv04->dirty &= ~NV04_NEW_BLEND; - - nv04_emit_blend(nv04); - } - - if (nv04->dirty & NV04_NEW_VTXARRAYS) { - nv04->dirty &= ~NV04_NEW_VTXARRAYS; - nv04_vertex_layout(nv04); - } - - if (nv04->dirty & NV04_NEW_SAMPLER) { - nv04->dirty &= ~NV04_NEW_SAMPLER; - - nv04_emit_sampler(nv04, 0); - } - - if (nv04->dirty & NV04_NEW_VIEWPORT) { - nv04->dirty &= ~NV04_NEW_VIEWPORT; -// nv04_state_emit_viewport(nv04); - } - - if (nv04->dirty & NV04_NEW_FRAMEBUFFER) { - nv04->dirty &= ~NV04_NEW_FRAMEBUFFER; - nv04_state_emit_framebuffer(nv04); - } - - /* Emit relocs for every referenced buffer. - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. This isn't very efficient, but we don't - * seem to take a significant performance hit. Will be improved - * at some point. Vertex arrays are emitted by nv04_vbo.c - */ - - /* Render target */ - unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch; - unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch; - - BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(chan, rt_pitch|(zeta_pitch<<16)); - OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - if (nv04->zeta) { - BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - } - - /* Texture images */ - for (i = 0; i < 1; i++) { - if (!(nv04->fp_samplers & (1 << i))) - continue; - struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); - BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); - OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - } -} - diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c deleted file mode 100644 index 0387ff4e78b..00000000000 --- a/src/gallium/drivers/nv04/nv04_surface.c +++ /dev/null @@ -1,63 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "nv04_context.h" -#include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" -#include "util/u_tile.h" - -static void -nv04_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - struct nv04_context *nv04 = nv04_context(pipe); - struct nv04_surface_2d *eng2d = nv04->screen->eng2d; - - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); -} - -static void -nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv04_context *nv04 = nv04_context(pipe); - struct nv04_surface_2d *eng2d = nv04->screen->eng2d; - - eng2d->fill(eng2d, dest, destx, desty, width, height, value); -} - -void -nv04_init_surface_functions(struct nv04_context *nv04) -{ - nv04->pipe.surface_copy = nv04_surface_copy; - nv04->pipe.surface_fill = nv04_surface_fill; -} diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c deleted file mode 100644 index 2dd2e146a8f..00000000000 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ /dev/null @@ -1,178 +0,0 @@ -#include <pipe/p_state.h> -#include <pipe/p_defines.h> -#include <pipe/p_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_math.h> -#include <nouveau/nouveau_winsys.h> -#include "nv04_context.h" -#include "nv04_screen.h" -#include "nv04_state.h" - -struct nv04_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; - -static void -nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, - struct pipe_texture *template) -{ - memset(template, 0, sizeof(struct pipe_texture)); - template->target = pt->target; - template->format = pt->format; - template->width0 = width; - template->height0 = height; - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; -} - -static struct pipe_transfer * -nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct nv04_miptree *mt = (struct nv04_miptree *)pt; - struct nv04_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; - - tx = CALLOC_STRUCT(nv04_transfer); - if (!tx) - return NULL; - - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; - tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; - - /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || - debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) - { - tx->direct = true; - tx->surface = pscreen->get_tex_surface(pscreen, pt, - 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); - return &tx->base; - } - - tx->direct = false; - - nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template); - - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); - if (!tx_tex) - { - FREE(tx); - return NULL; - } - - tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch; - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); - - pipe_texture_reference(&tx_tex, NULL); - - if (!tx->surface) - { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct nv04_screen *nvscreen = nv04_screen(pscreen); - struct pipe_surface *src; - - src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, x, y, - w, h); - - pipe_surface_reference(&src, NULL); - } - - return &tx->base; -} - -static void -nv04_transfer_del(struct pipe_transfer *ptx) -{ - struct nv04_transfer *tx = (struct nv04_transfer *)ptx; - - if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv04_screen *nvscreen = nv04_screen(pscreen); - struct pipe_surface *dst; - - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, tx->base.x, tx->base.y, - tx->surface, 0, 0, - tx->base.width, tx->base.height); - - pipe_surface_reference(&dst, NULL); - } - - pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); - FREE(ptx); -} - -static void * -nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv04_transfer *tx = (struct nv04_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv04_miptree *mt = (struct nv04_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); - - if(!tx->direct) - return map + ns->base.offset; - else - return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); -} - -static void -nv04_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv04_transfer *tx = (struct nv04_transfer *)ptx; - struct nv04_miptree *mt = (struct nv04_miptree *)tx->surface->texture; - - pipe_buffer_unmap(pscreen, mt->buffer); -} - -void -nv04_screen_init_transfer_functions(struct pipe_screen *pscreen) -{ - pscreen->get_tex_transfer = nv04_transfer_new; - pscreen->tex_transfer_destroy = nv04_transfer_del; - pscreen->transfer_map = nv04_transfer_map; - pscreen->transfer_unmap = nv04_transfer_unmap; -} diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c deleted file mode 100644 index 34847718145..00000000000 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ /dev/null @@ -1,78 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "pipe/p_inlines.h" - -#include "nv04_context.h" -#include "nv04_state.h" - -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_pushbuf.h" - -void nv04_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) -{ - struct pipe_screen *pscreen = pipe->screen; - struct nv04_context *nv04 = nv04_context( pipe ); - struct draw_context *draw = nv04->draw; - unsigned i; - - nv04_emit_hw_state(nv04); - - /* - * Map vertex buffers - */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (nv04->vtxbuf[i].buffer) { - void *buf - = pipe_buffer_map(pscreen, - nv04->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } - } - /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pscreen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, NULL); - } - - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, - nv04->constbuf[PIPE_SHADER_VERTEX], - nv04->constbuf_nr[PIPE_SHADER_VERTEX]); - - /* draw! */ - draw_arrays(nv04->draw, prim, start, count); - - /* - * unmap vertex/index buffers - */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (nv04->vtxbuf[i].buffer) { - pipe_buffer_unmap(pscreen, nv04->vtxbuf[i].buffer); - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - } - if (indexBuffer) { - pipe_buffer_unmap(pscreen, indexBuffer); - draw_set_mapped_element_buffer(draw, 0, NULL); - } -} - -void nv04_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - printf("coucou in draw arrays\n"); - nv04_draw_elements(pipe, NULL, 0, prim, start, count); -} - - - diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile deleted file mode 100644 index 62677f5194a..00000000000 --- a/src/gallium/drivers/nv10/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv10 - -C_SOURCES = \ - nv10_clear.c \ - nv10_context.c \ - nv10_fragprog.c \ - nv10_fragtex.c \ - nv10_miptree.c \ - nv10_prim_vbuf.c \ - nv10_screen.c \ - nv10_state.c \ - nv10_state_emit.c \ - nv10_surface.c \ - nv10_transfer.c \ - nv10_vbo.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c deleted file mode 100644 index a39a2b5f525..00000000000 --- a/src/gallium/drivers/nv10/nv10_clear.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_clear.h" - -#include "nv10_context.h" - -void -nv10_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - util_clear(pipe, nv10_context(pipe)->framebuffer, buffers, rgba, depth, - stencil); -} diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c deleted file mode 100644 index 1ecb73d06e8..00000000000 --- a/src/gallium/drivers/nv10/nv10_context.c +++ /dev/null @@ -1,298 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" - -#include "nv10_context.h" -#include "nv10_screen.h" - -static void -nv10_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nv10_context *nv10 = nv10_context(pipe); - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - - draw_flush(nv10->draw); - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv10_destroy(struct pipe_context *pipe) -{ - struct nv10_context *nv10 = nv10_context(pipe); - - if (nv10->draw) - draw_destroy(nv10->draw); - - FREE(nv10); -} - -static void nv10_init_hwctx(struct nv10_context *nv10) -{ - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - int i; - float projectionmatrix[16]; - - BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1); - OUT_RING (chan, screen->sync->handle); - BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->gart->handle); - BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - - BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); - OUT_RING (chan, 0); - - BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - - BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING (chan, (0x7ff<<16)|0x800); - BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING (chan, (0x7ff<<16)|0x800); - - for (i=1;i<8;i++) { - BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (chan, 0); - } - - BEGIN_RING(chan, celsius, 0x290, 1); - OUT_RING (chan, (0x10<<16)|1); - BEGIN_RING(chan, celsius, 0x3f4, 1); - OUT_RING (chan, 0); - - BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); - OUT_RING (chan, 0); - - if (nv10->screen->celsius->grclass != NV10TCL) { - /* For nv11, nv17 */ - BEGIN_RING(chan, celsius, 0x120, 3); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 2); - - BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); - OUT_RING (chan, 0); - } - - BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); - OUT_RING (chan, 0); - - /* Set state */ - BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (chan, 0x207); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - - BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12); - OUT_RING (chan, 0x30141010); - OUT_RING (chan, 0); - OUT_RING (chan, 0x20040000); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0x00000c00); - OUT_RING (chan, 0); - OUT_RING (chan, 0x00000c00); - OUT_RING (chan, 0x18000000); - OUT_RING (chan, 0x300e0300); - OUT_RING (chan, 0x0c091c80); - - BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0x8006); - BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8); - OUT_RING (chan, 0xff); - OUT_RING (chan, 0x207); - OUT_RING (chan, 0); - OUT_RING (chan, 0xff); - OUT_RING (chan, 0x1e00); - OUT_RING (chan, 0x1e00); - OUT_RING (chan, 0x1e00); - OUT_RING (chan, 0x1d01); - BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (chan, 0x201); - BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (chan, 8); - BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1); - OUT_RING (chan, 8); - BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (chan, 0x1b02); - OUT_RING (chan, 0x1b02); - BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (chan, 0x405); - OUT_RING (chan, 0x901); - BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8); - for (i=0;i<8;i++) { - OUT_RING (chan, 0); - } - BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RING (chan, 0x3fc00000); /* -1.50 */ - OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */ - OUT_RING (chan, 0); /* 0.00 */ - - BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); - OUT_RING (chan, 0); - - BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2); - OUT_RING (chan, 0x802); - OUT_RING (chan, 2); - /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when - * using texturing, except when using the texture matrix - */ - BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); - OUT_RING (chan, 6); - BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (chan, 0x01010101); - - /* Set vertex component */ - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4); - OUT_RINGf (chan, 1.0); - OUT_RINGf (chan, 1.0); - OUT_RINGf (chan, 1.0); - OUT_RINGf (chan, 1.0); - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RINGf (chan, 1.0); - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 1.0); - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 1.0); - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1); - OUT_RINGf (chan, 0.0); - BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (chan, 1); - - memset(projectionmatrix, 0, sizeof(projectionmatrix)); - BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16); - projectionmatrix[0*4+0] = 1.0; - projectionmatrix[1*4+1] = 1.0; - projectionmatrix[2*4+2] = 1.0; - projectionmatrix[3*4+3] = 1.0; - for (i=0;i<16;i++) { - OUT_RINGf (chan, projectionmatrix[i]); - } - - BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); - OUT_RING (chan, 0.0); - OUT_RINGf (chan, 16777216.0); - - BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (chan, -2048.0); - OUT_RINGf (chan, -2048.0); - OUT_RINGf (chan, 16777215.0 * 0.5); - OUT_RING (chan, 0); - - FIRE_RING (chan); -} - -struct pipe_context * -nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) -{ - struct nv10_screen *screen = nv10_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nv10_context *nv10; - struct nouveau_winsys *nvws = screen->nvws; - - nv10 = CALLOC(1, sizeof(struct nv10_context)); - if (!nv10) - return NULL; - nv10->screen = screen; - nv10->pctx_id = pctx_id; - - nv10->nvws = nvws; - - nv10->pipe.winsys = ws; - nv10->pipe.screen = pscreen; - nv10->pipe.destroy = nv10_destroy; - nv10->pipe.draw_arrays = nv10_draw_arrays; - nv10->pipe.draw_elements = nv10_draw_elements; - nv10->pipe.clear = nv10_clear; - nv10->pipe.flush = nv10_flush; - - nv10->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nv10->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - nv10_init_surface_functions(nv10); - nv10_init_state_functions(nv10); - - nv10->draw = draw_create(); - assert(nv10->draw); - draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10)); - - nv10_init_hwctx(nv10); - - return &nv10->pipe; -} - diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h deleted file mode 100644 index ab4b825487d..00000000000 --- a/src/gallium/drivers/nv10/nv10_context.h +++ /dev/null @@ -1,151 +0,0 @@ -#ifndef __NV10_CONTEXT_H__ -#define __NV10_CONTEXT_H__ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_compiler.h" - -#include "util/u_memory.h" -#include "util/u_math.h" - -#include "draw/draw_vertex.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" - -#include "nv10_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -#define NV10_NEW_VERTPROG (1 << 0) -#define NV10_NEW_FRAGPROG (1 << 1) -#define NV10_NEW_VTXARRAYS (1 << 2) -#define NV10_NEW_BLEND (1 << 3) -#define NV10_NEW_BLENDCOL (1 << 4) -#define NV10_NEW_RAST (1 << 5) -#define NV10_NEW_DSA (1 << 6) -#define NV10_NEW_VIEWPORT (1 << 7) -#define NV10_NEW_SCISSOR (1 << 8) -#define NV10_NEW_FRAMEBUFFER (1 << 9) - -#include "nv10_screen.h" - -struct nv10_context { - struct pipe_context pipe; - - struct nouveau_winsys *nvws; - struct nv10_screen *screen; - unsigned pctx_id; - - struct draw_context *draw; - - uint32_t dirty; - - struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; - - uint32_t rt_enable; - struct pipe_buffer *rt[4]; - struct pipe_buffer *zeta; - uint32_t lma_offset; - - struct nv10_blend_state *blend; - struct pipe_blend_color *blend_color; - struct nv10_rasterizer_state *rast; - struct nv10_depth_stencil_alpha_state *dsa; - struct pipe_viewport_state *viewport; - struct pipe_scissor_state *scissor; - struct pipe_framebuffer_state *framebuffer; - - //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; - float *constbuf[PIPE_SHADER_TYPES][32][4]; - unsigned constbuf_nr[PIPE_SHADER_TYPES]; - - struct vertex_info vertex_info; - - struct { - struct pipe_buffer *buffer; - uint32_t format; - } tex[2]; - - unsigned vb_enable; - struct { - struct pipe_buffer *buffer; - unsigned delta; - } vb[16]; - -/* struct { - - struct nouveau_resource *exec_heap; - struct nouveau_resource *data_heap; - - struct nv10_vertex_program *active; - - struct nv10_vertex_program *current; - } vertprog; -*/ - struct { - struct nv10_fragment_program *active; - - struct nv10_fragment_program *current; - struct pipe_buffer *constant_buf; - } fragprog; - - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; -}; - -static INLINE struct nv10_context * -nv10_context(struct pipe_context *pipe) -{ - return (struct nv10_context *)pipe; -} - -extern void nv10_init_state_functions(struct nv10_context *nv10); -extern void nv10_init_surface_functions(struct nv10_context *nv10); - -extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen); - -/* nv10_clear.c */ -extern void nv10_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - - -/* nv10_draw.c */ -extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10); - -/* nv10_fragprog.c */ -extern void nv10_fragprog_bind(struct nv10_context *, - struct nv10_fragment_program *); -extern void nv10_fragprog_destroy(struct nv10_context *, - struct nv10_fragment_program *); - -/* nv10_fragtex.c */ -extern void nv10_fragtex_bind(struct nv10_context *); - -/* nv10_prim_vbuf.c */ -struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ); -extern void nv10_vtxbuf_bind(struct nv10_context* nv10); - -/* nv10_state.c and friends */ -extern void nv10_emit_hw_state(struct nv10_context *nv10); -extern void nv10_state_tex_update(struct nv10_context *nv10); - -/* nv10_vbo.c */ -extern void nv10_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv10_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count); - - -#endif diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c deleted file mode 100644 index 698db5a16a9..00000000000 --- a/src/gallium/drivers/nv10/nv10_fragprog.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv10_context.h" - -void -nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp) -{ -} - -void -nv10_fragprog_destroy(struct nv10_context *nv10, - struct nv10_fragment_program *fp) -{ -} - diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c deleted file mode 100644 index c1f7ccb9ab6..00000000000 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ /dev/null @@ -1,130 +0,0 @@ -#include "nv10_context.h" -#include "nouveau/nouveau_util.h" - -#define _(m,tf) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV10TCL_TX_FORMAT_FORMAT_##tf, \ -} - -struct nv10_texture_format { - boolean defined; - uint pipe; - int format; -}; - -static struct nv10_texture_format -nv10_texture_formats[] = { - _(A8R8G8B8_UNORM, A8R8G8B8), - _(A1R5G5B5_UNORM, A1R5G5B5), - _(A4R4G4B4_UNORM, A4R4G4B4), - _(L8_UNORM , L8 ), - _(A8_UNORM , A8 ), - _(A8L8_UNORM , A8L8 ), -// _(RGB_DXT1 , DXT1, ), -// _(RGBA_DXT1 , DXT1, ), -// _(RGBA_DXT3 , DXT3, ), -// _(RGBA_DXT5 , DXT5, ), - {}, -}; - -static struct nv10_texture_format * -nv10_fragtex_format(uint pipe_format) -{ - struct nv10_texture_format *tf = nv10_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; - } - - return NULL; -} - - -static void -nv10_fragtex_build(struct nv10_context *nv10, int unit) -{ -#if 0 - struct nv10_sampler_state *ps = nv10->tex_sampler[unit]; - struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; - struct pipe_texture *pt = &nv10mt->base; - struct nv10_texture_format *tf; - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - uint32_t txf, txs, txp; - - tf = nv10_fragtex_format(pt->format); - if (!tf || !tf->defined) { - NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); - return; - } - - txf = tf->format << 8; - txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width0) << 20; - txf |= log2i(pt->height0) << 24; - txf |= log2i(pt->depth0) << 28; - txf |= 8; - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - txf |= NV10TCL_TX_FORMAT_CUBE_MAP; - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= (2<<4); - break; - case PIPE_TEXTURE_1D: - txf |= (1<<4); - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; - } - - BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (chan, ps->wrap); - OUT_RING (chan, 0x40000000); /* enable */ - OUT_RING (chan, txs); - OUT_RING (chan, ps->filt | 0x2000 /* magic */); - OUT_RING (chan, (pt->width0 << 16) | pt->height0); - OUT_RING (chan, ps->bcol); -#endif -} - -void -nv10_fragtex_bind(struct nv10_context *nv10) -{ -#if 0 - struct nv10_fragment_program *fp = nv10->fragprog.active; - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - unsigned samplers, unit; - - samplers = nv10->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (chan, 0); - } - - samplers = nv10->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - nv10_fragtex_build(nv10, unit); - } - - nv10->fp_samplers = fp->samplers; -#endif -} - diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c deleted file mode 100644 index 908482ad854..00000000000 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ /dev/null @@ -1,165 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" - -#include "nv10_context.h" -#include "nv10_screen.h" - -static void -nv10_miptree_layout(struct nv10_miptree *nv10mt) -{ - struct pipe_texture *pt = &nv10mt->base; - boolean swizzled = FALSE; - uint width = pt->width0; - uint offset = 0; - int nr_faces, l, f; - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - if (swizzled) - nv10mt->level[l].pitch = util_format_get_stride(pt->format, width); - else - nv10mt->level[l].pitch = util_format_get_stride(pt->format, pt->width0); - nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; - - nv10mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = u_minify(width, 1); - - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l <= pt->last_level; l++) { - nv10mt->level[l].image_offset[f] = offset; - offset += nv10mt->level[l].pitch * u_minify(pt->height0, l); - } - } - - nv10mt->total_size = offset; -} - -static struct pipe_texture * -nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) -{ - struct nv10_miptree *mt; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nv10_miptree); - if (!mt) - return NULL; - - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static struct pipe_texture * -nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) -{ - struct nv10_miptree *mt; - - mt = MALLOC(sizeof(struct nv10_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = screen; - - nv10_miptree_layout(mt); - - mt->buffer = screen->buffer_create(screen, 256, PIPE_BUFFER_USAGE_PIXEL, - mt->total_size); - if (!mt->buffer) { - FREE(mt); - return NULL; - } - mt->bo = nouveau_bo(mt->buffer); - - return &mt->base; -} - -static void -nv10_miptree_destroy(struct pipe_texture *pt) -{ - struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; - int l; - - pipe_buffer_reference(&nv10mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (nv10mt->level[l].image_offset) - FREE(nv10mt->level[l].image_offset); - } - FREE(nv10mt); -} - -static void -nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, - uint face, uint levels) -{ -} - - -static struct pipe_surface * -nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_texture_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = nv10mt->level[level].pitch; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ns->base.offset = nv10mt->level[level].image_offset[face]; - } else { - ns->base.offset = nv10mt->level[level].image_offset[0]; - } - - return &ns->base; -} - -static void -nv10_miptree_surface_destroy(struct pipe_surface *surface) -{ -} - -void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv10_miptree_create; - pscreen->texture_blanket = nv10_miptree_blanket; - pscreen->texture_destroy = nv10_miptree_destroy; - pscreen->get_tex_surface = nv10_miptree_surface_get; - pscreen->tex_surface_destroy = nv10_miptree_surface_destroy; -} - diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c deleted file mode 100644 index c5dbe43dbc8..00000000000 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ /dev/null @@ -1,267 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * Build post-transformation, post-clipping vertex buffers and element - * lists by hooking into the end of the primitive pipeline and - * manipulating the vertex_id field in the vertex headers. - * - * XXX: work in progress - * - * \author José Fonseca <[email protected]> - * \author Keith Whitwell <[email protected]> - */ - - -#include "util/u_debug.h" -#include "pipe/p_inlines.h" - -#include "nv10_context.h" -#include "nv10_state.h" - -#include "draw/draw_vbuf.h" - -/** - * Primitive renderer for nv10. - */ -struct nv10_vbuf_render { - struct vbuf_render base; - - struct nv10_context *nv10; - - /** Vertex buffer */ - struct pipe_buffer* buffer; - - /** Vertex size in bytes */ - unsigned vertex_size; - - /** Hardware primitive */ - unsigned hwprim; -}; - - -void nv10_vtxbuf_bind( struct nv10_context* nv10 ) -{ - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - int i; - for(i = 0; i < 8; i++) { - BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(chan, 0/*nv10->vtxbuf*/); - BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1); - OUT_RING(chan, 0/*XXX*/); - } -} - -/** - * Basically a cast wrapper. - */ -static INLINE struct nv10_vbuf_render * -nv10_vbuf_render( struct vbuf_render *render ) -{ - assert(render); - return (struct nv10_vbuf_render *)render; -} - - -static const struct vertex_info * -nv10_vbuf_render_get_vertex_info( struct vbuf_render *render ) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - struct nv10_context *nv10 = nv10_render->nv10; - - nv10_emit_hw_state(nv10); - - return &nv10->vertex_info; -} - -static boolean -nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, - ushort vertex_size, - ushort nr_vertices ) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - struct nv10_context *nv10 = nv10_render->nv10; - struct pipe_screen *screen = nv10->pipe.screen; - size_t size = (size_t)vertex_size * (size_t)nr_vertices; - - assert(!nv10_render->buffer); - nv10_render->buffer = screen->buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, size); - - nv10->dirty |= NV10_NEW_VTXARRAYS; - - if (nv10_render->buffer) - return FALSE; - return TRUE; -} - -static void * -nv10_vbuf_render_map_vertices( struct vbuf_render *render ) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - struct nv10_context *nv10 = nv10_render->nv10; - struct pipe_screen *pscreen = nv10->pipe.screen; - - return pipe_buffer_map(pscreen, nv10_render->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); -} - -static void -nv10_vbuf_render_unmap_vertices( struct vbuf_render *render, - ushort min_index, - ushort max_index ) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - struct nv10_context *nv10 = nv10_render->nv10; - struct pipe_screen *pscreen = nv10->pipe.screen; - - assert(!nv10_render->buffer); - pipe_buffer_unmap(pscreen, nv10_render->buffer); -} - -static boolean -nv10_vbuf_render_set_primitive( struct vbuf_render *render, - unsigned prim ) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - unsigned hwp = nvgl_primitive(prim); - if (hwp == 0) - return FALSE; - - nv10_render->hwprim = hwp; - return TRUE; -} - - -static void -nv10_vbuf_render_draw( struct vbuf_render *render, - const ushort *indices, - uint nr_indices) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - struct nv10_context *nv10 = nv10_render->nv10; - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - int push, i; - - nv10_emit_hw_state(nv10); - - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(chan, nv10_render->hwprim); - - if (nr_indices & 1) { - BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, indices[0]); - indices++; nr_indices--; - } - - while (nr_indices) { - // XXX too big/small ? check the size - push = MIN2(nr_indices, 1200 * 2); - - BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (indices[i+1] << 16) | indices[i]); - - nr_indices -= push; - indices += push; - } - - BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (chan, 0); -} - - -static void -nv10_vbuf_render_release_vertices( struct vbuf_render *render ) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - - assert(nv10_render->buffer); - pipe_buffer_reference(&nv10_render->buffer, NULL); -} - - -static void -nv10_vbuf_render_destroy( struct vbuf_render *render ) -{ - struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); - FREE(nv10_render); -} - - -/** - * Create a new primitive render. - */ -static struct vbuf_render * -nv10_vbuf_render_create( struct nv10_context *nv10 ) -{ - struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render); - - nv10_render->nv10 = nv10; - - nv10_render->base.max_vertex_buffer_bytes = 16*1024; - nv10_render->base.max_indices = 1024; - nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info; - nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices; - nv10_render->base.map_vertices = nv10_vbuf_render_map_vertices; - nv10_render->base.unmap_vertices = nv10_vbuf_render_unmap_vertices; - nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive; - nv10_render->base.draw = nv10_vbuf_render_draw; - nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices; - nv10_render->base.destroy = nv10_vbuf_render_destroy; - - return &nv10_render->base; -} - - -/** - * Create a new primitive vbuf/render stage. - */ -struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ) -{ - struct vbuf_render *render; - struct draw_stage *stage; - - render = nv10_vbuf_render_create(nv10); - if(!render) - return NULL; - - stage = draw_vbuf_stage( nv10->draw, render ); - if(!stage) { - render->destroy(render); - return NULL; - } - - return stage; -} diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c deleted file mode 100644 index 69a6dab866a..00000000000 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ /dev/null @@ -1,198 +0,0 @@ -#include "pipe/p_screen.h" - -#include "nv10_context.h" -#include "nv10_screen.h" - -static int -nv10_screen_get_param(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 2; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 0; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 0; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 0; - case NOUVEAU_CAP_HW_VTXBUF: - case NOUVEAU_CAP_HW_IDXBUF: - return 0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv10_screen_get_paramf(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 2.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv10_screen_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static void -nv10_screen_destroy(struct pipe_screen *pscreen) -{ - struct nv10_screen *screen = nv10_screen(pscreen); - - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->celsius); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -static struct pipe_buffer * -nv10_surface_buffer(struct pipe_surface *surf) -{ - struct nv10_miptree *mt = (struct nv10_miptree *)surf->texture; - - return mt->buffer; -} - -struct pipe_screen * -nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - unsigned celsius_class; - int ret; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv10_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv10_screen_destroy; - pscreen->get_param = nv10_screen_get_param; - pscreen->get_paramf = nv10_screen_get_paramf; - pscreen->is_format_supported = nv10_screen_is_format_supported; - - nv10_screen_init_miptree_functions(pscreen); - nv10_screen_init_transfer_functions(pscreen); - - /* 3D object */ - if (dev->chipset >= 0x20) - celsius_class = NV11TCL; - else if (dev->chipset >= 0x17) - celsius_class = NV17TCL; - else if (dev->chipset >= 0x11) - celsius_class = NV11TCL; - else - celsius_class = NV10TCL; - - if (!celsius_class) { - NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", dev->chipset); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef0001, celsius_class, - &screen->celsius); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv10_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv10_screen_destroy(pscreen); - return NULL; - } - - return pscreen; -} - diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h deleted file mode 100644 index 86b6d8def54..00000000000 --- a/src/gallium/drivers/nv10/nv10_screen.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __NV10_SCREEN_H__ -#define __NV10_SCREEN_H__ - -#include "nouveau/nouveau_screen.h" -#include "nv04/nv04_surface_2d.h" - -struct nv10_screen { - struct nouveau_screen base; - - struct nouveau_winsys *nvws; - - /* HW graphics objects */ - struct nv04_surface_2d *eng2d; - struct nouveau_grobj *celsius; - struct nouveau_notifier *sync; -}; - -static INLINE struct nv10_screen * -nv10_screen(struct pipe_screen *screen) -{ - return (struct nv10_screen *)screen; -} - - -void -nv10_screen_init_transfer_functions(struct pipe_screen *pscreen); - -#endif diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h deleted file mode 100644 index 2524ac02e29..00000000000 --- a/src/gallium/drivers/nv10/nv10_state.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef __NV10_STATE_H__ -#define __NV10_STATE_H__ - -#include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" - -struct nv10_blend_state { - uint32_t b_enable; - uint32_t b_srcfunc; - uint32_t b_dstfunc; - - uint32_t c_mask; - - uint32_t d_enable; -}; - -struct nv10_sampler_state { - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv10_rasterizer_state { - uint32_t shade_model; - - uint32_t line_width; - uint32_t line_smooth_en; - - uint32_t point_size; - - uint32_t poly_smooth_en; - - uint32_t poly_mode_front; - uint32_t poly_mode_back; - - uint32_t front_face; - uint32_t cull_face; - uint32_t cull_face_en; - - uint32_t point_sprite; - - const struct pipe_rasterizer_state *templ; -}; - -struct nv10_vertex_program_exec { - uint32_t data[4]; - boolean has_branch_offset; - int const_index; -}; - -struct nv10_vertex_program_data { - int index; /* immediates == -1 */ - float value[4]; -}; - -struct nv10_vertex_program { - const struct pipe_shader_state *pipe; - - boolean translated; - struct nv10_vertex_program_exec *insns; - unsigned nr_insns; - struct nv10_vertex_program_data *consts; - unsigned nr_consts; - - struct nouveau_resource *exec; - unsigned exec_start; - struct nouveau_resource *data; - unsigned data_start; - unsigned data_start_min; - - uint32_t ir; - uint32_t or; -}; - -struct nv10_fragment_program_data { - unsigned offset; - unsigned index; -}; - -struct nv10_fragment_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - - boolean translated; - boolean on_hw; - unsigned samplers; - - uint32_t *insn; - int insn_len; - - struct nv10_fragment_program_data *consts; - unsigned nr_consts; - - struct pipe_buffer *buffer; - - uint32_t fp_control; - uint32_t fp_reg_control; -}; - - -struct nv10_depth_stencil_alpha_state { - struct { - uint32_t func; - uint32_t write_enable; - uint32_t test_enable; - } depth; - - struct { - uint32_t enable; - uint32_t wmask; - uint32_t func; - uint32_t ref; - uint32_t vmask; - uint32_t fail; - uint32_t zfail; - uint32_t zpass; - } stencil; - - struct { - uint32_t enabled; - uint32_t func; - uint32_t ref; - } alpha; -}; - -struct nv10_miptree { - struct pipe_texture base; - struct nouveau_bo *bo; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[PIPE_MAX_TEXTURE_LEVELS]; -}; - -#endif diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c deleted file mode 100644 index 30a596ca604..00000000000 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ /dev/null @@ -1,333 +0,0 @@ -#include "nv10_context.h" -#include "nv10_state.h" - -static void nv10_state_emit_blend(struct nv10_context* nv10) -{ - struct nv10_blend_state *b = nv10->blend; - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - - BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1); - OUT_RING (chan, b->d_enable); - - BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (chan, b->b_enable); - OUT_RING (chan, b->b_srcfunc); - OUT_RING (chan, b->b_dstfunc); - - BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (chan, b->c_mask); -} - -static void nv10_state_emit_blend_color(struct nv10_context* nv10) -{ - struct pipe_blend_color *c = nv10->blend_color; - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - - BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1); - OUT_RING (chan, - (float_to_ubyte(c->color[3]) << 24)| - (float_to_ubyte(c->color[0]) << 16)| - (float_to_ubyte(c->color[1]) << 8) | - (float_to_ubyte(c->color[2]) << 0)); -} - -static void nv10_state_emit_rast(struct nv10_context* nv10) -{ - struct nv10_rasterizer_state *r = nv10->rast; - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - - BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2); - OUT_RING (chan, r->shade_model); - OUT_RING (chan, r->line_width); - - - BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (chan, r->point_size); - - BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (chan, r->poly_mode_front); - OUT_RING (chan, r->poly_mode_back); - - - BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (chan, r->cull_face); - OUT_RING (chan, r->front_face); - - BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (chan, r->line_smooth_en); - OUT_RING (chan, r->poly_smooth_en); - - BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (chan, r->cull_face_en); -} - -static void nv10_state_emit_dsa(struct nv10_context* nv10) -{ - struct nv10_depth_stencil_alpha_state *d = nv10->dsa; - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - - BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (chan, d->depth.func); - - BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (chan, d->depth.write_enable); - - BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (chan, d->depth.test_enable); - -#if 0 - BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1); - OUT_RING (chan, d->stencil.enable); - BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7); - OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); -#endif - - BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (chan, d->alpha.enabled); - - BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (chan, d->alpha.func); - - BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1); - OUT_RING (chan, d->alpha.ref); -} - -static void nv10_state_emit_viewport(struct nv10_context* nv10) -{ -} - -static void nv10_state_emit_scissor(struct nv10_context* nv10) -{ - // XXX this is so not working -/* struct pipe_scissor_state *s = nv10->scissor; - BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ -} - -static void nv10_state_emit_framebuffer(struct nv10_context* nv10) -{ - struct pipe_framebuffer_state* fb = nv10->framebuffer; - struct nv04_surface *rt, *zeta = NULL; - uint32_t rt_format, w, h; - int colour_format = 0, zeta_format = 0; - struct nv10_miptree *nv10mt = 0; - - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - - w = fb->cbufs[0]->width; - h = fb->cbufs[0]->height; - colour_format = fb->cbufs[0]->format; - rt = (struct nv04_surface *)fb->cbufs[0]; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_X8R8G8B8_UNORM: - rt_format |= NV10TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - if (zeta) { - BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); - } else { - BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (chan, rt->pitch | (rt->pitch << 16)); - } - - nv10mt = (struct nv10_miptree *)rt->base.texture; - nv10->rt[0] = nv10mt->buffer; - - if (zeta_format) - { - nv10mt = (struct nv10_miptree *)zeta->base.texture; - nv10->zeta = nv10mt->buffer; - } - - BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3); - OUT_RING (chan, (w << 16) | 0); - OUT_RING (chan, (h << 16) | 0); - OUT_RING (chan, rt_format); - BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800); - OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800); -} - -static void nv10_vertex_layout(struct nv10_context *nv10) -{ - struct nv10_fragment_program *fp = nv10->fragprog.current; - uint32_t src = 0; - int i; - struct vertex_info vinfo; - - memset(&vinfo, 0, sizeof(vinfo)); - - for (i = 0; i < fp->info.num_inputs; i++) { - switch (fp->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); - break; - case TGSI_SEMANTIC_COLOR: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); - break; - default: - case TGSI_SEMANTIC_GENERIC: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - break; - case TGSI_SEMANTIC_FOG: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - break; - } - } - draw_compute_vertex_size(&vinfo); -} - -void -nv10_emit_hw_state(struct nv10_context *nv10) -{ - struct nv10_screen *screen = nv10->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *celsius = screen->celsius; - struct nouveau_bo *rt_bo; - int i; - - if (nv10->dirty & NV10_NEW_VERTPROG) { - //nv10_vertprog_bind(nv10, nv10->vertprog.current); - nv10->dirty &= ~NV10_NEW_VERTPROG; - } - - if (nv10->dirty & NV10_NEW_FRAGPROG) { - nv10_fragprog_bind(nv10, nv10->fragprog.current); - /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */ - nv10->dirty_samplers |= (1<<10); - nv10->dirty_samplers = 0; - } - - if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) { - nv10_fragtex_bind(nv10); - nv10->dirty &= ~NV10_NEW_FRAGPROG; - } - - if (nv10->dirty & NV10_NEW_VTXARRAYS) { - nv10->dirty &= ~NV10_NEW_VTXARRAYS; - nv10_vertex_layout(nv10); - nv10_vtxbuf_bind(nv10); - } - - if (nv10->dirty & NV10_NEW_BLEND) { - nv10->dirty &= ~NV10_NEW_BLEND; - nv10_state_emit_blend(nv10); - } - - if (nv10->dirty & NV10_NEW_BLENDCOL) { - nv10->dirty &= ~NV10_NEW_BLENDCOL; - nv10_state_emit_blend_color(nv10); - } - - if (nv10->dirty & NV10_NEW_RAST) { - nv10->dirty &= ~NV10_NEW_RAST; - nv10_state_emit_rast(nv10); - } - - if (nv10->dirty & NV10_NEW_DSA) { - nv10->dirty &= ~NV10_NEW_DSA; - nv10_state_emit_dsa(nv10); - } - - if (nv10->dirty & NV10_NEW_VIEWPORT) { - nv10->dirty &= ~NV10_NEW_VIEWPORT; - nv10_state_emit_viewport(nv10); - } - - if (nv10->dirty & NV10_NEW_SCISSOR) { - nv10->dirty &= ~NV10_NEW_SCISSOR; - nv10_state_emit_scissor(nv10); - } - - if (nv10->dirty & NV10_NEW_FRAMEBUFFER) { - nv10->dirty &= ~NV10_NEW_FRAMEBUFFER; - nv10_state_emit_framebuffer(nv10); - } - - /* Emit relocs for every referenced buffer. - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. This isn't very efficient, but we don't - * seem to take a significant performance hit. Will be improved - * at some point. Vertex arrays are emitted by nv10_vbo.c - */ - - /* Render target */ - rt_bo = nouveau_bo(nv10->rt[0]); -// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly -// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1); -// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - if (nv10->zeta) { - struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta); -// XXX -// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1); -// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1); - OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/ - } - - /* Vertex buffer */ - BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - /* Texture images */ - for (i = 0; i < 2; i++) { - if (!(nv10->fp_samplers & (1 << i))) - continue; - struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer); - BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1); - OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1); - OUT_RELOCd(chan, bo, nv10->tex[i].format, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | - NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, - NV10TCL_TX_FORMAT_DMA1); - } -} - diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c deleted file mode 100644 index 5b52246a9ca..00000000000 --- a/src/gallium/drivers/nv10/nv10_surface.c +++ /dev/null @@ -1,63 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "nv10_context.h" -#include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" -#include "util/u_tile.h" - -static void -nv10_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - struct nv10_context *nv10 = nv10_context(pipe); - struct nv04_surface_2d *eng2d = nv10->screen->eng2d; - - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); -} - -static void -nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv10_context *nv10 = nv10_context(pipe); - struct nv04_surface_2d *eng2d = nv10->screen->eng2d; - - eng2d->fill(eng2d, dest, destx, desty, width, height, value); -} - -void -nv10_init_surface_functions(struct nv10_context *nv10) -{ - nv10->pipe.surface_copy = nv10_surface_copy; - nv10->pipe.surface_fill = nv10_surface_fill; -} diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c deleted file mode 100644 index eb04af9782e..00000000000 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ /dev/null @@ -1,178 +0,0 @@ -#include <pipe/p_state.h> -#include <pipe/p_defines.h> -#include <pipe/p_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_math.h> -#include <nouveau/nouveau_winsys.h> -#include "nv10_context.h" -#include "nv10_screen.h" -#include "nv10_state.h" - -struct nv10_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; - -static void -nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, - struct pipe_texture *template) -{ - memset(template, 0, sizeof(struct pipe_texture)); - template->target = pt->target; - template->format = pt->format; - template->width0 = width; - template->height0 = height; - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; -} - -static struct pipe_transfer * -nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct nv10_miptree *mt = (struct nv10_miptree *)pt; - struct nv10_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; - - tx = CALLOC_STRUCT(nv10_transfer); - if (!tx) - return NULL; - - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; - tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; - - /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || - debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) - { - tx->direct = true; - tx->surface = pscreen->get_tex_surface(pscreen, pt, - 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); - return &tx->base; - } - - tx->direct = false; - - nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template); - - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); - if (!tx_tex) - { - FREE(tx); - return NULL; - } - - tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch; - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); - - pipe_texture_reference(&tx_tex, NULL); - - if (!tx->surface) - { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct nv10_screen *nvscreen = nv10_screen(pscreen); - struct pipe_surface *src; - - src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, x, y, - w, h); - - pipe_surface_reference(&src, NULL); - } - - return &tx->base; -} - -static void -nv10_transfer_del(struct pipe_transfer *ptx) -{ - struct nv10_transfer *tx = (struct nv10_transfer *)ptx; - - if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv10_screen *nvscreen = nv10_screen(pscreen); - struct pipe_surface *dst; - - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, tx->base.x, tx->base.y, - tx->surface, 0, 0, - tx->base.width, tx->base.height); - - pipe_surface_reference(&dst, NULL); - } - - pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); - FREE(ptx); -} - -static void * -nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv10_transfer *tx = (struct nv10_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv10_miptree *mt = (struct nv10_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); - - if(!tx->direct) - return map + ns->base.offset; - else - return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); -} - -static void -nv10_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv10_transfer *tx = (struct nv10_transfer *)ptx; - struct nv10_miptree *mt = (struct nv10_miptree *)tx->surface->texture; - - pipe_buffer_unmap(pscreen, mt->buffer); -} - -void -nv10_screen_init_transfer_functions(struct pipe_screen *pscreen) -{ - pscreen->get_tex_transfer = nv10_transfer_new; - pscreen->tex_transfer_destroy = nv10_transfer_del; - pscreen->transfer_map = nv10_transfer_map; - pscreen->transfer_unmap = nv10_transfer_unmap; -} diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c deleted file mode 100644 index 9180c72c9b0..00000000000 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ /dev/null @@ -1,77 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "pipe/p_inlines.h" - -#include "nv10_context.h" -#include "nv10_state.h" - -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_pushbuf.h" - -void nv10_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) -{ - struct nv10_context *nv10 = nv10_context( pipe ); - struct draw_context *draw = nv10->draw; - struct pipe_screen *pscreen = pipe->screen; - unsigned i; - - nv10_emit_hw_state(nv10); - - /* - * Map vertex buffers - */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (nv10->vtxbuf[i].buffer) { - void *buf = - pipe_buffer_map(pscreen, nv10->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } - } - /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pscreen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, NULL); - } - - draw_set_mapped_constant_buffer(draw, - PIPE_SHADER_VERTEX, - nv10->constbuf[PIPE_SHADER_VERTEX], - nv10->constbuf_nr[PIPE_SHADER_VERTEX]); - - /* draw! */ - draw_arrays(nv10->draw, prim, start, count); - - /* - * unmap vertex/index buffers - */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (nv10->vtxbuf[i].buffer) { - pipe_buffer_unmap(pscreen, nv10->vtxbuf[i].buffer); - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - } - if (indexBuffer) { - pipe_buffer_unmap(pscreen, indexBuffer); - draw_set_mapped_element_buffer(draw, 0, NULL); - } -} - -void nv10_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - nv10_draw_elements(pipe, NULL, 0, prim, start, count); -} - - - diff --git a/src/gallium/drivers/nv20/Makefile b/src/gallium/drivers/nv20/Makefile deleted file mode 100644 index 1305f26c591..00000000000 --- a/src/gallium/drivers/nv20/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv20 - -C_SOURCES = \ - nv20_clear.c \ - nv20_context.c \ - nv20_fragprog.c \ - nv20_fragtex.c \ - nv20_miptree.c \ - nv20_prim_vbuf.c \ - nv20_screen.c \ - nv20_state.c \ - nv20_state_emit.c \ - nv20_surface.c \ - nv20_transfer.c \ - nv20_vbo.c -# nv20_vertprog.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv20/nv20_clear.c b/src/gallium/drivers/nv20/nv20_clear.c deleted file mode 100644 index 2b4490fa5e1..00000000000 --- a/src/gallium/drivers/nv20/nv20_clear.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_clear.h" - -#include "nv20_context.h" - -void -nv20_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - util_clear(pipe, nv20_context(pipe)->framebuffer, buffers, rgba, depth, - stencil); -} diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c deleted file mode 100644 index 5b80af2d22a..00000000000 --- a/src/gallium/drivers/nv20/nv20_context.c +++ /dev/null @@ -1,424 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" - -#include "nv20_context.h" -#include "nv20_screen.h" - -static void -nv20_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nv20_context *nv20 = nv20_context(pipe); - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - - draw_flush(nv20->draw); - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv20_destroy(struct pipe_context *pipe) -{ - struct nv20_context *nv20 = nv20_context(pipe); - - if (nv20->draw) - draw_destroy(nv20->draw); - - FREE(nv20); -} - -static void nv20_init_hwctx(struct nv20_context *nv20) -{ - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - int i; - float projectionmatrix[16]; - const boolean is_nv25tcl = (kelvin->grclass == NV25TCL); - - BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1); - OUT_RING (chan, screen->sync->handle); - BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */ - BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); /* ZETA */ - - BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1); - OUT_RING (chan, 0); /* renouveau: beef0351, unique */ - - BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING (chan, (0xfff << 16) | 0x0); - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING (chan, (0xfff << 16) | 0x0); - - for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (chan, 0); - } - - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); - OUT_RING (chan, 0); - - BEGIN_RING(chan, kelvin, 0x17e0, 3); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 1.0); - - if (is_nv25tcl) { - BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); - } else { - BEGIN_RING(chan, kelvin, 0x1e68, 1); - OUT_RING (chan, 0x4b800000); /* 16777216.000000 */ - BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL); - } - - BEGIN_RING(chan, kelvin, 0x290, 1); - OUT_RING (chan, (0x10 << 16) | 1); - BEGIN_RING(chan, kelvin, 0x9fc, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, 0x1d80, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, kelvin, 0x9f8, 1); - OUT_RING (chan, 4); - BEGIN_RING(chan, kelvin, 0x17ec, 3); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 1.0); - OUT_RINGf (chan, 0.0); - - if (is_nv25tcl) { - BEGIN_RING(chan, kelvin, 0x1d88, 1); - OUT_RING (chan, 3); - - BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1); - OUT_RING (chan, chan->vram->handle); - BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1); - OUT_RING (chan, chan->vram->handle); - } - BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1); - OUT_RING (chan, 0); /* renouveau: beef1e10 */ - - BEGIN_RING(chan, kelvin, 0x1e98, 1); - OUT_RING (chan, 0); -#if 0 - if (is_nv25tcl) { - BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); - OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ - OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */ - - BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1); - OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ - } -#endif - BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1); - OUT_RING (chan, 0); - - BEGIN_RING(chan, kelvin, 0x120, 3); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 2); - -/* error: ILLEGAL_MTHD, PROTECTION_FAULT - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 512.0); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 0.0); -*/ - - if (is_nv25tcl) { - BEGIN_RING(chan, kelvin, 0x022c, 2); - OUT_RING (chan, 0x280); - OUT_RING (chan, 0x07d28000); - } - -/* * illegal method, protection fault - BEGIN_RING(chan, NvSub3D, 0x1c2c, 1); - OUT_RING (chan, 0); */ - - if (is_nv25tcl) { - BEGIN_RING(chan, kelvin, 0x1da4, 1); - OUT_RING (chan, 0); - } - -/* * crashes with illegal method, protection fault - BEGIN_RING(chan, NvSub3D, 0x1c18, 1); - OUT_RING (chan, 0x200); */ - - BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING (chan, (0 << 16) | 0); - OUT_RING (chan, (0 << 16) | 0); - - /* *** Set state *** */ - - BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); - OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */ - - for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { - BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1); - OUT_RING (chan, 0); - } - BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4); - OUT_RING (chan, 0x30d410d0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4); - OUT_RING (chan, 0x00000c00); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1); - OUT_RING (chan, 0x00011101); - BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2); - OUT_RING (chan, 0x130e0300); - OUT_RING (chan, 0x0c091c80); - BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); - OUT_RING (chan, 0x00000c00); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4); - OUT_RING (chan, 0x20c400c0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); - OUT_RING (chan, 0x035125a0); - OUT_RING (chan, 0); - OUT_RING (chan, 0x40002000); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (chan, 0xffff0000); - - BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4); - OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE); - OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO); - OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */ - OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD); - BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RING (chan, 0xff); - OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); - OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */ - OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */ - OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP); - OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP); - OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP); - - BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY); - BEGIN_RING(chan, kelvin, 0x17cc, 1); - OUT_RING (chan, 0); - if (is_nv25tcl) { - BEGIN_RING(chan, kelvin, 0x1d84, 1); - OUT_RING (chan, 1); - } - BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1); - OUT_RING (chan, 0x00020000); - BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), - NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); - for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { - OUT_RING(chan, 0xffffffff); - } - - BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (chan, 0); - OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ - OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS); - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (chan, 1); - if (!is_nv25tcl) { - BEGIN_RING(chan, kelvin, 0x1d84, 1); - OUT_RING (chan, 3); - } - BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); - if (!is_nv25tcl) { - OUT_RING (chan, 8); - } else { - OUT_RINGf (chan, 1.0); - } - if (!is_nv25tcl) { - BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */ - } else { - BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, 0x0a1c, 1); - OUT_RING (chan, 0x800); - } - BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1); - OUT_RING (chan, 8); - BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL); - OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL); - BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (chan, NV20TCL_CULL_FACE_BACK); - OUT_RING (chan, NV20TCL_FRONT_FACE_CCW); - BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1); - OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH); - BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); - for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { - OUT_RING(chan, 0); - } - BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RINGf (chan, 1.5); - OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ - OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ - BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2); - OUT_RING (chan, NV20TCL_FOG_MODE_EXP_SIGNED); - OUT_RING (chan, NV20TCL_FOG_COORD_FOG); - BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2); - OUT_RING (chan, 0); - OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */ - BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1); - OUT_RING (chan, NV20TCL_ENGINE_FIXED); - - for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { - BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); - OUT_RING (chan, 0); - } - - BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); - OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); - OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); - OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); - for (i = 4; i < 16; ++i) { - OUT_RINGf(chan, 0.0); - OUT_RINGf(chan, 0.0); - OUT_RINGf(chan, 0.0); - OUT_RINGf(chan, 1.0); - } - - BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (chan, 0x00010101); - BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1); - OUT_RING (chan, 0); - - memset(projectionmatrix, 0, sizeof(projectionmatrix)); - projectionmatrix[0*4+0] = 1.0; - projectionmatrix[1*4+1] = 1.0; - projectionmatrix[2*4+2] = 16777215.0; - projectionmatrix[3*4+3] = 1.0; - BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); - for (i = 0; i < 16; i++) { - OUT_RINGf (chan, projectionmatrix[i]); - } - - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */ - OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */ - OUT_RINGf (chan, 0.0); - OUT_RINGf (chan, 16777215.0); - - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (chan, 0.0); /* no effect?, w/2 */ - OUT_RINGf (chan, 0.0); /* no effect?, h/2 */ - OUT_RINGf (chan, 16777215.0 * 0.5); - OUT_RINGf (chan, 65535.0); - - FIRE_RING (chan); -} - -struct pipe_context * -nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) -{ - struct nv20_screen *screen = nv20_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nv20_context *nv20; - struct nouveau_winsys *nvws = screen->nvws; - - nv20 = CALLOC(1, sizeof(struct nv20_context)); - if (!nv20) - return NULL; - nv20->screen = screen; - nv20->pctx_id = pctx_id; - - nv20->nvws = nvws; - - nv20->pipe.winsys = ws; - nv20->pipe.screen = pscreen; - nv20->pipe.destroy = nv20_destroy; - nv20->pipe.draw_arrays = nv20_draw_arrays; - nv20->pipe.draw_elements = nv20_draw_elements; - nv20->pipe.clear = nv20_clear; - nv20->pipe.flush = nv20_flush; - - nv20->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nv20->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - nv20_init_surface_functions(nv20); - nv20_init_state_functions(nv20); - - nv20->draw = draw_create(); - assert(nv20->draw); - draw_set_rasterize_stage(nv20->draw, nv20_draw_vbuf_stage(nv20)); - - nv20_init_hwctx(nv20); - - return &nv20->pipe; -} - diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h deleted file mode 100644 index c7dfadaa311..00000000000 --- a/src/gallium/drivers/nv20/nv20_context.h +++ /dev/null @@ -1,150 +0,0 @@ -#ifndef __NV20_CONTEXT_H__ -#define __NV20_CONTEXT_H__ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_compiler.h" - -#include "util/u_memory.h" -#include "util/u_math.h" - -#include "draw/draw_vertex.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" - -#include "nv20_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -#define NV20_NEW_VERTPROG (1 << 0) -#define NV20_NEW_FRAGPROG (1 << 1) -#define NV20_NEW_VTXARRAYS (1 << 2) -#define NV20_NEW_BLEND (1 << 3) -#define NV20_NEW_BLENDCOL (1 << 4) -#define NV20_NEW_RAST (1 << 5) -#define NV20_NEW_DSA (1 << 6) -#define NV20_NEW_VIEWPORT (1 << 7) -#define NV20_NEW_SCISSOR (1 << 8) -#define NV20_NEW_FRAMEBUFFER (1 << 9) - -#include "nv20_screen.h" - -struct nv20_context { - struct pipe_context pipe; - - struct nouveau_winsys *nvws; - struct nv20_screen *screen; - unsigned pctx_id; - - struct draw_context *draw; - - uint32_t dirty; - - struct nv20_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv20_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; - - uint32_t rt_enable; - struct pipe_buffer *rt[4]; - struct pipe_buffer *zeta; - uint32_t lma_offset; - - struct nv20_blend_state *blend; - struct pipe_blend_color *blend_color; - struct nv20_rasterizer_state *rast; - struct nv20_depth_stencil_alpha_state *dsa; - struct pipe_viewport_state *viewport; - struct pipe_scissor_state *scissor; - struct pipe_framebuffer_state *framebuffer; - - //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; - float *constbuf[PIPE_SHADER_TYPES][32][4]; - unsigned constbuf_nr[PIPE_SHADER_TYPES]; - - struct vertex_info vertex_info; - - struct { - struct pipe_buffer *buffer; - uint32_t format; - } tex[2]; - - unsigned vb_enable; - struct { - struct pipe_buffer *buffer; - unsigned delta; - } vb[16]; - -/* struct { - - struct nouveau_resource *exec_heap; - struct nouveau_resource *data_heap; - - struct nv20_vertex_program *active; - - struct nv20_vertex_program *current; - } vertprog; -*/ - struct { - struct nv20_fragment_program *active; - - struct nv20_fragment_program *current; - struct pipe_buffer *constant_buf; - } fragprog; - - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; -}; - -static INLINE struct nv20_context * -nv20_context(struct pipe_context *pipe) -{ - return (struct nv20_context *)pipe; -} - -extern void nv20_init_state_functions(struct nv20_context *nv20); -extern void nv20_init_surface_functions(struct nv20_context *nv20); - -extern void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen); - -/* nv20_clear.c */ -extern void nv20_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - -/* nv20_draw.c */ -extern struct draw_stage *nv20_draw_render_stage(struct nv20_context *nv20); - -/* nv20_fragprog.c */ -extern void nv20_fragprog_bind(struct nv20_context *, - struct nv20_fragment_program *); -extern void nv20_fragprog_destroy(struct nv20_context *, - struct nv20_fragment_program *); - -/* nv20_fragtex.c */ -extern void nv20_fragtex_bind(struct nv20_context *); - -/* nv20_prim_vbuf.c */ -struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ); -extern void nv20_vtxbuf_bind(struct nv20_context* nv20); - -/* nv20_state.c and friends */ -extern void nv20_emit_hw_state(struct nv20_context *nv20); -extern void nv20_state_tex_update(struct nv20_context *nv20); - -/* nv20_vbo.c */ -extern void nv20_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv20_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count); - - -#endif diff --git a/src/gallium/drivers/nv20/nv20_fragprog.c b/src/gallium/drivers/nv20/nv20_fragprog.c deleted file mode 100644 index 4f496369dd3..00000000000 --- a/src/gallium/drivers/nv20/nv20_fragprog.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv20_context.h" - -void -nv20_fragprog_bind(struct nv20_context *nv20, struct nv20_fragment_program *fp) -{ -} - -void -nv20_fragprog_destroy(struct nv20_context *nv20, - struct nv20_fragment_program *fp) -{ -} - diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c deleted file mode 100644 index dedbec73f39..00000000000 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ /dev/null @@ -1,130 +0,0 @@ -#include "nv20_context.h" -#include "nouveau/nouveau_util.h" - -#define _(m,tf) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV20TCL_TX_FORMAT_FORMAT_##tf, \ -} - -struct nv20_texture_format { - boolean defined; - uint pipe; - int format; -}; - -static struct nv20_texture_format -nv20_texture_formats[] = { - _(A8R8G8B8_UNORM, A8R8G8B8), - _(A1R5G5B5_UNORM, A1R5G5B5), - _(A4R4G4B4_UNORM, A4R4G4B4), - _(L8_UNORM , L8 ), - _(A8_UNORM , A8 ), - _(A8L8_UNORM , A8L8 ), -/* _(RGB_DXT1 , DXT1, ), */ -/* _(RGBA_DXT1 , DXT1, ), */ -/* _(RGBA_DXT3 , DXT3, ), */ -/* _(RGBA_DXT5 , DXT5, ), */ - {}, -}; - -static struct nv20_texture_format * -nv20_fragtex_format(uint pipe_format) -{ - struct nv20_texture_format *tf = nv20_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; - } - - return NULL; -} - - -static void -nv20_fragtex_build(struct nv20_context *nv20, int unit) -{ -#if 0 - struct nv20_sampler_state *ps = nv20->tex_sampler[unit]; - struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; - struct pipe_texture *pt = &nv20mt->base; - struct nv20_texture_format *tf; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - uint32_t txf, txs, txp; - - tf = nv20_fragtex_format(pt->format); - if (!tf || !tf->defined) { - NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); - return; - } - - txf = tf->format << 8; - txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width0) << 20; - txf |= log2i(pt->height0) << 24; - txf |= log2i(pt->depth0) << 28; - txf |= 8; - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - txf |= NV10TCL_TX_FORMAT_CUBE_MAP; - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= (2<<4); - break; - case PIPE_TEXTURE_1D: - txf |= (1<<4); - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; - } - - BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (chan, ps->wrap); - OUT_RING (chan, 0x40000000); /* enable */ - OUT_RING (chan, txs); - OUT_RING (chan, ps->filt | 0x2000 /* magic */); - OUT_RING (chan, (pt->width0 << 16) | pt->height0); - OUT_RING (chan, ps->bcol); -#endif -} - -void -nv20_fragtex_bind(struct nv20_context *nv20) -{ -#if 0 - struct nv20_fragment_program *fp = nv20->fragprog.active; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - unsigned samplers, unit; - - samplers = nv20->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (chan, 0); - } - - samplers = nv20->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - nv20_fragtex_build(nv20, unit); - } - - nv20->fp_samplers = fp->samplers; -#endif -} - diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c deleted file mode 100644 index 8f7538e7f57..00000000000 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ /dev/null @@ -1,226 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" - -#include "nv20_context.h" -#include "nv20_screen.h" -#include "../nv04/nv04_surface_2d.h" - -static void -nv20_miptree_layout(struct nv20_miptree *nv20mt) -{ - struct pipe_texture *pt = &nv20mt->base; - uint width = pt->width0; - uint offset = 0; - int nr_faces, l, f; - uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL | - PIPE_TEXTURE_USAGE_RENDER_TARGET | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY); - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv20mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); - else - nv20mt->level[l].pitch = util_format_get_stride(pt->format, width); - - nv20mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = u_minify(width, 1); - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l < pt->last_level; l++) { - nv20mt->level[l].image_offset[f] = offset; - - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) - offset += align(nv20mt->level[l].pitch * u_minify(pt->height0, l), 64); - else - offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv20mt->level[l].image_offset[f] = offset; - offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv20mt->total_size = offset; -} - -static struct pipe_texture * -nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) -{ - struct nv20_miptree *mt; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nv20_miptree); - if (!mt) - return NULL; - - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static struct pipe_texture * -nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) -{ - struct nv20_miptree *mt; - unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE; - - mt = MALLOC(sizeof(struct nv20_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = screen; - - /* Swizzled textures must be POT */ - if (pt->width0 & (pt->width0 - 1) || - pt->height0 & (pt->height0 - 1)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else { - switch (pt->format) { - /* TODO: Figure out which formats can be swizzled */ - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R16_SNORM: - { - if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - break; - } - default: - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - } - } - - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; - - /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. - * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. - * This also happens for small mipmaps of large textures. */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - nv20_miptree_layout(mt); - - mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size); - if (!mt->buffer) { - FREE(mt); - return NULL; - } - mt->bo = nouveau_bo(mt->buffer); - - return &mt->base; -} - -static void -nv20_miptree_destroy(struct pipe_texture *pt) -{ - struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt; - int l; - - pipe_buffer_reference(&nv20mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (nv20mt->level[l].image_offset) - FREE(nv20mt->level[l].image_offset); - } -} - -static struct pipe_surface * -nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_texture_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = nv20mt->level[level].pitch; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ns->base.offset = nv20mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ns->base.offset = nv20mt->level[level].image_offset[zslice]; - } else { - ns->base.offset = nv20mt->level[level].image_offset[0]; - } - - /* create a linear temporary that we can render into if necessary. - * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so - * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ - if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base; - - return &ns->base; -} - -static void -nv20_miptree_surface_destroy(struct pipe_surface *ps) -{ - struct nv04_surface* ns = (struct nv04_surface*)ps; - if(ns->backing) - { - struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen; - if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) - screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nv20_miptree_surface_destroy(&ns->backing->base); - } - - pipe_texture_reference(&ps->texture, NULL); - FREE(ps); -} - -void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv20_miptree_create; - pscreen->texture_blanket = nv20_miptree_blanket; - pscreen->texture_destroy = nv20_miptree_destroy; - pscreen->get_tex_surface = nv20_miptree_surface_get; - pscreen->tex_surface_destroy = nv20_miptree_surface_destroy; -} - diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c deleted file mode 100644 index 2e145672da1..00000000000 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ /dev/null @@ -1,440 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * Build post-transformation, post-clipping vertex buffers and element - * lists by hooking into the end of the primitive pipeline and - * manipulating the vertex_id field in the vertex headers. - * - * XXX: work in progress - * - * \author José Fonseca <[email protected]> - * \author Keith Whitwell <[email protected]> - */ - - -#include "util/u_debug.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" - -#include "nv20_context.h" -#include "nv20_state.h" - -#include "draw/draw_vbuf.h" - -/** - * Primitive renderer for nv20. - */ -struct nv20_vbuf_render { - struct vbuf_render base; - - struct nv20_context *nv20; - - /** Vertex buffer in VRAM */ - struct pipe_buffer *pbuffer; - - /** Vertex buffer in normal memory */ - void *mbuffer; - - /** Vertex size in bytes */ - /*unsigned vertex_size;*/ - - /** Hardware primitive */ - unsigned hwprim; -}; - -/** - * Basically a cast wrapper. - */ -static INLINE struct nv20_vbuf_render * -nv20_vbuf_render(struct vbuf_render *render) -{ - assert(render); - return (struct nv20_vbuf_render *)render; -} - -void nv20_vtxbuf_bind( struct nv20_context* nv20 ) -{ -#if 0 - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - int i; - for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { - BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(chan, 0/*nv20->vtxbuf*/); - BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1); - OUT_RING(chan, 0/*XXX*/); - } -#endif -} - -static const struct vertex_info * -nv20_vbuf_render_get_vertex_info( struct vbuf_render *render ) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - struct nv20_context *nv20 = nv20_render->nv20; - - nv20_emit_hw_state(nv20); - - return &nv20->vertex_info; -} - -static void * -nv20__allocate_mbuffer(struct nv20_vbuf_render *nv20_render, size_t size) -{ - nv20_render->mbuffer = MALLOC(size); - return nv20_render->mbuffer; -} - -static void -nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size) -{ - struct pipe_screen *screen = nv20_render->nv20->pipe.screen; - nv20_render->pbuffer = screen->buffer_create(screen, 64, - PIPE_BUFFER_USAGE_VERTEX, size); -} - -static boolean -nv20_vbuf_render_allocate_vertices( struct vbuf_render *render, - ushort vertex_size, - ushort nr_vertices ) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - size_t size = (size_t)vertex_size * (size_t)nr_vertices; - void *buf; - - assert(!nv20_render->pbuffer); - assert(!nv20_render->mbuffer); - - /* - * For small amount of vertices, don't bother with pipe vertex - * buffer, the data will be passed directly via the fifo. - */ - /* XXX: Pipe vertex buffers don't work. */ - if (0 && size > 16 * 1024) { - nv20__allocate_pbuffer(nv20_render, size); - /* umm yeah so this is ugly */ - buf = nv20_render->pbuffer; - } else { - buf = nv20__allocate_mbuffer(nv20_render, size); - } - - if (buf) - nv20_render->nv20->dirty |= NV20_NEW_VTXARRAYS; - - return buf ? TRUE : FALSE; -} - -static void * -nv20_vbuf_render_map_vertices( struct vbuf_render *render ) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - struct pipe_screen *pscreen = nv20_render->nv20->pipe.screen; - - if (nv20_render->pbuffer) { - return pipe_buffer_map(pscreen, nv20_render->pbuffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - } else if (nv20_render->mbuffer) { - return nv20_render->mbuffer; - } else - assert(0); - - /* warnings be gone */ - return NULL; -} - -static void -nv20_vbuf_render_unmap_vertices( struct vbuf_render *render, - ushort min_index, - ushort max_index ) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - struct pipe_screen *pscreen = nv20_render->nv20->pipe.screen; - - if (nv20_render->pbuffer) - pipe_buffer_unmap(pscreen, nv20_render->pbuffer); -} - -static boolean -nv20_vbuf_render_set_primitive( struct vbuf_render *render, - unsigned prim ) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - unsigned hwp = nvgl_primitive(prim); - if (hwp == 0) - return FALSE; - - nv20_render->hwprim = hwp; - return TRUE; -} - -static uint32_t -nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) -{ - return (stride << NV20TCL_VTXFMT_STRIDE_SHIFT) | - (fields << NV20TCL_VTXFMT_SIZE_SHIFT) | - (type << NV20TCL_VTXFMT_TYPE_SHIFT); -} - -static unsigned -nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) -{ - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - uint32_t hwfmt = 0; - unsigned fields; - - switch (type) { - case EMIT_OMIT: - hwfmt = nv20__vtxhwformat(0, 0, 2); - fields = 0; - break; - case EMIT_1F: - hwfmt = nv20__vtxhwformat(4, 1, 2); - fields = 1; - break; - case EMIT_2F: - hwfmt = nv20__vtxhwformat(8, 2, 2); - fields = 2; - break; - case EMIT_3F: - hwfmt = nv20__vtxhwformat(12, 3, 2); - fields = 3; - break; - case EMIT_4F: - hwfmt = nv20__vtxhwformat(16, 4, 2); - fields = 4; - break; - default: - NOUVEAU_ERR("unhandled attrib_emit %d\n", type); - return 0; - } - - BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1); - OUT_RING(chan, hwfmt); - return fields; -} - -static unsigned -nv20__emit_vertex_array_format(struct nv20_context *nv20) -{ - struct vertex_info *vinfo = &nv20->vertex_info; - int hwattr = NV20TCL_VTXFMT__SIZE; - int attr = 0; - unsigned nr_fields = 0; - - while (hwattr-- > 0) { - if (vinfo->hwfmt[0] & (1 << hwattr)) { - nr_fields += nv20__emit_format(nv20, - vinfo->attrib[attr].emit, hwattr); - attr++; - } else - nv20__emit_format(nv20, EMIT_OMIT, hwattr); - } - - return nr_fields; -} - -static void -nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, - const ushort *indices, - uint nr_indices) -{ - struct nv20_context *nv20 = nv20_render->nv20; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - struct vertex_info *vinfo = &nv20->vertex_info; - unsigned nr_fields; - int max_push; - ubyte *data = nv20_render->mbuffer; - int vsz = 4 * vinfo->size; - - nr_fields = nv20__emit_vertex_array_format(nv20); - - BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(chan, nv20_render->hwprim); - - max_push = 1200 / nr_fields; - while (nr_indices) { - int i; - int push = MIN2(nr_indices, max_push); - - BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); - for (i = 0; i < push; i++) { - /* XXX: fixme to handle other than floats? */ - int f = nr_fields; - float *attrv = (float*)&data[indices[i] * vsz]; - while (f-- > 0) - OUT_RINGf(chan, *attrv++); - } - - nr_indices -= push; - indices += push; - } - - BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP); -} - -static void -nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, - const ushort *indices, - uint nr_indices) -{ - struct nv20_context *nv20 = nv20_render->nv20; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - int push, i; - - NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); - - BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - - BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(chan, nv20_render->hwprim); - - if (nr_indices & 1) { - BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, indices[0]); - indices++; nr_indices--; - } - - while (nr_indices) { - // XXX too big/small ? check the size - push = MIN2(nr_indices, 1200 * 2); - - BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (indices[i+1] << 16) | indices[i]); - - nr_indices -= push; - indices += push; - } - - BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (chan, 0); -} - -static void -nv20_vbuf_render_draw( struct vbuf_render *render, - const ushort *indices, - uint nr_indices) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - - nv20_emit_hw_state(nv20_render->nv20); - - if (nv20_render->pbuffer) - nv20__draw_pbuffer(nv20_render, indices, nr_indices); - else if (nv20_render->mbuffer) - nv20__draw_mbuffer(nv20_render, indices, nr_indices); - else - assert(0); -} - - -static void -nv20_vbuf_render_release_vertices( struct vbuf_render *render ) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - struct nv20_context *nv20 = nv20_render->nv20; - - if (nv20_render->pbuffer) { - pipe_buffer_reference(&nv20_render->pbuffer, NULL); - } else if (nv20_render->mbuffer) { - FREE(nv20_render->mbuffer); - nv20_render->mbuffer = NULL; - } else - assert(0); -} - - -static void -nv20_vbuf_render_destroy( struct vbuf_render *render ) -{ - struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); - - assert(!nv20_render->pbuffer); - assert(!nv20_render->mbuffer); - - FREE(nv20_render); -} - - -/** - * Create a new primitive render. - */ -static struct vbuf_render * -nv20_vbuf_render_create( struct nv20_context *nv20 ) -{ - struct nv20_vbuf_render *nv20_render = CALLOC_STRUCT(nv20_vbuf_render); - - nv20_render->nv20 = nv20; - - nv20_render->base.max_vertex_buffer_bytes = 16*1024; - nv20_render->base.max_indices = 1024; - nv20_render->base.get_vertex_info = nv20_vbuf_render_get_vertex_info; - nv20_render->base.allocate_vertices = - nv20_vbuf_render_allocate_vertices; - nv20_render->base.map_vertices = nv20_vbuf_render_map_vertices; - nv20_render->base.unmap_vertices = nv20_vbuf_render_unmap_vertices; - nv20_render->base.set_primitive = nv20_vbuf_render_set_primitive; - nv20_render->base.draw = nv20_vbuf_render_draw; - nv20_render->base.release_vertices = nv20_vbuf_render_release_vertices; - nv20_render->base.destroy = nv20_vbuf_render_destroy; - - return &nv20_render->base; -} - - -/** - * Create a new primitive vbuf/render stage. - */ -struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ) -{ - struct vbuf_render *render; - struct draw_stage *stage; - - render = nv20_vbuf_render_create(nv20); - if(!render) - return NULL; - - stage = draw_vbuf_stage( nv20->draw, render ); - if(!stage) { - render->destroy(render); - return NULL; - } - - return stage; -} diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c deleted file mode 100644 index d091335063b..00000000000 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ /dev/null @@ -1,194 +0,0 @@ -#include "pipe/p_screen.h" - -#include "nv20_context.h" -#include "nv20_screen.h" - -static int -nv20_screen_get_param(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 2; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 0; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 0; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 0; - case NOUVEAU_CAP_HW_VTXBUF: - case NOUVEAU_CAP_HW_IDXBUF: - return 0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv20_screen_get_paramf(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 2.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv20_screen_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static void -nv20_screen_destroy(struct pipe_screen *pscreen) -{ - struct nv20_screen *screen = nv20_screen(pscreen); - - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->kelvin); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -static struct pipe_buffer * -nv20_surface_buffer(struct pipe_surface *surf) -{ - struct nv20_miptree *mt = (struct nv20_miptree *)surf->texture; - - return mt->buffer; -} - -struct pipe_screen * -nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nv20_screen *screen = CALLOC_STRUCT(nv20_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - unsigned kelvin_class = 0; - int ret; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv20_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv20_screen_destroy; - pscreen->get_param = nv20_screen_get_param; - pscreen->get_paramf = nv20_screen_get_paramf; - pscreen->is_format_supported = nv20_screen_is_format_supported; - - nv20_screen_init_miptree_functions(pscreen); - nv20_screen_init_transfer_functions(pscreen); - - /* 3D object */ - if (dev->chipset >= 0x25) - kelvin_class = NV25TCL; - else if (dev->chipset >= 0x20) - kelvin_class = NV20TCL; - - if (!kelvin_class || dev->chipset >= 0x30) { - NOUVEAU_ERR("Unknown nv2x chipset: nv%02x\n", dev->chipset); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef0097, kelvin_class, - &screen->kelvin); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv20_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv20_screen_destroy(pscreen); - return NULL; - } - - return pscreen; -} - diff --git a/src/gallium/drivers/nv20/nv20_screen.h b/src/gallium/drivers/nv20/nv20_screen.h deleted file mode 100644 index fc7bb050334..00000000000 --- a/src/gallium/drivers/nv20/nv20_screen.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __NV20_SCREEN_H__ -#define __NV20_SCREEN_H__ - -#include "nouveau/nouveau_screen.h" -#include "nv04/nv04_surface_2d.h" - -struct nv20_screen { - struct nouveau_screen base; - - struct nouveau_winsys *nvws; - - /* HW graphics objects */ - struct nv04_surface_2d *eng2d; - struct nouveau_grobj *kelvin; - struct nouveau_notifier *sync; -}; - -static INLINE struct nv20_screen * -nv20_screen(struct pipe_screen *screen) -{ - return (struct nv20_screen *)screen; -} - - -void -nv20_screen_init_transfer_functions(struct pipe_screen *pscreen); - -#endif diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h deleted file mode 100644 index dde41065685..00000000000 --- a/src/gallium/drivers/nv20/nv20_state.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef __NV20_STATE_H__ -#define __NV20_STATE_H__ - -#include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" - -struct nv20_blend_state { - uint32_t b_enable; - uint32_t b_srcfunc; - uint32_t b_dstfunc; - - uint32_t c_mask; - - uint32_t d_enable; -}; - -struct nv20_sampler_state { - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv20_rasterizer_state { - uint32_t shade_model; - - uint32_t line_width; - uint32_t line_smooth_en; - - uint32_t point_size; - - uint32_t poly_smooth_en; - - uint32_t poly_mode_front; - uint32_t poly_mode_back; - - uint32_t front_face; - uint32_t cull_face; - uint32_t cull_face_en; - - uint32_t point_sprite; - - const struct pipe_rasterizer_state *templ; -}; - -struct nv20_vertex_program_exec { - uint32_t data[4]; - boolean has_branch_offset; - int const_index; -}; - -struct nv20_vertex_program_data { - int index; /* immediates == -1 */ - float value[4]; -}; - -struct nv20_vertex_program { - const struct pipe_shader_state *pipe; - - boolean translated; - struct nv20_vertex_program_exec *insns; - unsigned nr_insns; - struct nv20_vertex_program_data *consts; - unsigned nr_consts; - - struct nouveau_resource *exec; - unsigned exec_start; - struct nouveau_resource *data; - unsigned data_start; - unsigned data_start_min; - - uint32_t ir; - uint32_t or; -}; - -struct nv20_fragment_program_data { - unsigned offset; - unsigned index; -}; - -struct nv20_fragment_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - - boolean translated; - boolean on_hw; - unsigned samplers; - - uint32_t *insn; - int insn_len; - - struct nv20_fragment_program_data *consts; - unsigned nr_consts; - - struct pipe_buffer *buffer; - - uint32_t fp_control; - uint32_t fp_reg_control; -}; - - -struct nv20_depth_stencil_alpha_state { - struct { - uint32_t func; - uint32_t write_enable; - uint32_t test_enable; - } depth; - - struct { - uint32_t enable; - uint32_t wmask; - uint32_t func; - uint32_t ref; - uint32_t vmask; - uint32_t fail; - uint32_t zfail; - uint32_t zpass; - } stencil; - - struct { - uint32_t enabled; - uint32_t func; - uint32_t ref; - } alpha; -}; - -struct nv20_miptree { - struct pipe_texture base; - struct nouveau_bo *bo; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[PIPE_MAX_TEXTURE_LEVELS]; -}; - -#endif diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c deleted file mode 100644 index 6bbd1fdae9d..00000000000 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ /dev/null @@ -1,426 +0,0 @@ -#include "nv20_context.h" -#include "nv20_state.h" -#include "draw/draw_context.h" - -static void nv20_state_emit_blend(struct nv20_context* nv20) -{ - struct nv20_blend_state *b = nv20->blend; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - - BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (chan, b->d_enable); - - BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (chan, b->b_enable); - - BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2); - OUT_RING (chan, b->b_srcfunc); - OUT_RING (chan, b->b_dstfunc); - - BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (chan, b->c_mask); -} - -static void nv20_state_emit_blend_color(struct nv20_context* nv20) -{ - struct pipe_blend_color *c = nv20->blend_color; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - - BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1); - OUT_RING (chan, - (float_to_ubyte(c->color[3]) << 24)| - (float_to_ubyte(c->color[0]) << 16)| - (float_to_ubyte(c->color[1]) << 8) | - (float_to_ubyte(c->color[2]) << 0)); -} - -static void nv20_state_emit_rast(struct nv20_context* nv20) -{ - struct nv20_rasterizer_state *r = nv20->rast; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - - BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2); - OUT_RING (chan, r->shade_model); - OUT_RING (chan, r->line_width); - - - BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); - OUT_RING (chan, r->point_size); - - BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (chan, r->poly_mode_front); - OUT_RING (chan, r->poly_mode_back); - - - BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (chan, r->cull_face); - OUT_RING (chan, r->front_face); - - BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (chan, r->line_smooth_en); - OUT_RING (chan, r->poly_smooth_en); - - BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (chan, r->cull_face_en); -} - -static void nv20_state_emit_dsa(struct nv20_context* nv20) -{ - struct nv20_depth_stencil_alpha_state *d = nv20->dsa; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (chan, d->depth.func); - - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (chan, d->depth.write_enable); - - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (chan, d->depth.test_enable); - - BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (chan, 1); - -#if 0 - BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (chan, d->stencil.enable); - BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); -#endif - - BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (chan, d->alpha.enabled); - - BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (chan, d->alpha.func); - - BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1); - OUT_RING (chan, d->alpha.ref); -} - -static void nv20_state_emit_viewport(struct nv20_context* nv20) -{ -} - -static void nv20_state_emit_scissor(struct nv20_context* nv20) -{ - /* NV20TCL_SCISSOR_* is probably a software method */ -/* struct pipe_scissor_state *s = nv20->scissor; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - - BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2); - OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/ -} - -static void nv20_state_emit_framebuffer(struct nv20_context* nv20) -{ - struct pipe_framebuffer_state* fb = nv20->framebuffer; - struct nv04_surface *rt, *zeta = NULL; - uint32_t rt_format, w, h; - int colour_format = 0, zeta_format = 0; - struct nv20_miptree *nv20mt = 0; - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - - w = fb->cbufs[0]->width; - h = fb->cbufs[0]->height; - colour_format = fb->cbufs[0]->format; - rt = (struct nv04_surface *)fb->cbufs[0]; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20; - - switch (colour_format) { - case PIPE_FORMAT_X8R8G8B8_UNORM: - rt_format |= NV20TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV20TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - if (zeta) { - BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); - } else { - BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (chan, rt->pitch | (rt->pitch << 16)); - } - - nv20mt = (struct nv20_miptree *)rt->base.texture; - nv20->rt[0] = nv20mt->buffer; - - if (zeta_format) - { - nv20mt = (struct nv20_miptree *)zeta->base.texture; - nv20->zeta = nv20mt->buffer; - } - - BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3); - OUT_RING (chan, (w << 16) | 0); - OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */ - OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */ - BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (chan, ((w - 1) << 16) | 0); - OUT_RING (chan, ((h - 1) << 16) | 0); -} - -static void nv20_vertex_layout(struct nv20_context *nv20) -{ - struct nv20_fragment_program *fp = nv20->fragprog.current; - struct draw_context *dc = nv20->draw; - int src; - int i; - struct vertex_info *vinfo = &nv20->vertex_info; - const enum interp_mode colorInterp = INTERP_LINEAR; - boolean colors[2] = { FALSE }; - boolean generics[12] = { FALSE }; - boolean fog = FALSE; - - memset(vinfo, 0, sizeof(*vinfo)); - - /* - * Assumed NV20 hardware vertex attribute order: - * 0 position, 1 ?, 2 ?, 3 col0, - * 4 col1?, 5 ?, 6 ?, 7 ?, - * 8 ?, 9 tex0, 10 tex1, 11 tex2, - * 12 tex3, 13 ?, 14 ?, 15 ? - * unaccounted: wgh, nor, fog - * There are total 16 attrs. - * vinfo->hwfmt[0] has a used-bit corresponding to each of these. - * relation to TGSI_SEMANTIC_*: - * - POSITION: position (always used) - * - COLOR: col1, col0 - * - GENERIC: tex3, tex2, tex1, tex0, normal, weight - * - FOG: fog - */ - - for (i = 0; i < fp->info.num_inputs; i++) { - int isn = fp->info.input_semantic_name[i]; - int isi = fp->info.input_semantic_index[i]; - switch (isn) { - case TGSI_SEMANTIC_POSITION: - break; - case TGSI_SEMANTIC_COLOR: - assert(isi < 2); - colors[isi] = TRUE; - break; - case TGSI_SEMANTIC_GENERIC: - assert(isi < 12); - generics[isi] = TRUE; - break; - case TGSI_SEMANTIC_FOG: - fog = TRUE; - break; - default: - assert(0 && "unknown input_semantic_name"); - } - } - - /* always do position */ { - src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); - vinfo->hwfmt[0] |= (1 << 0); - } - - /* two unnamed generics */ - for (i = 4; i < 6; i++) { - if (!generics[i]) - continue; - src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - vinfo->hwfmt[0] |= (1 << (i - 3)); - } - - if (colors[0]) { - src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - vinfo->hwfmt[0] |= (1 << 3); - } - - if (colors[1]) { - src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - vinfo->hwfmt[0] |= (1 << 4); - } - - /* four unnamed generics */ - for (i = 6; i < 10; i++) { - if (!generics[i]) - continue; - src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - vinfo->hwfmt[0] |= (1 << (i - 1)); - } - - /* tex0, tex1, tex2, tex3 */ - for (i = 0; i < 4; i++) { - if (!generics[i]) - continue; - src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - vinfo->hwfmt[0] |= (1 << (i + 9)); - } - - /* two unnamed generics */ - for (i = 10; i < 12; i++) { - if (!generics[i]) - continue; - src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - vinfo->hwfmt[0] |= (1 << (i + 3)); - } - - if (fog) { - src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); - vinfo->hwfmt[0] |= (1 << 15); - } - - draw_compute_vertex_size(vinfo); -} - -void -nv20_emit_hw_state(struct nv20_context *nv20) -{ - struct nv20_screen *screen = nv20->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *kelvin = screen->kelvin; - struct nouveau_bo *rt_bo; - int i; - - if (nv20->dirty & NV20_NEW_VERTPROG) { - //nv20_vertprog_bind(nv20, nv20->vertprog.current); - nv20->dirty &= ~NV20_NEW_VERTPROG; - } - - if (nv20->dirty & NV20_NEW_FRAGPROG) { - nv20_fragprog_bind(nv20, nv20->fragprog.current); - /*XXX: clear NV20_NEW_FRAGPROG if no new program uploaded */ - nv20->dirty_samplers |= (1<<10); - nv20->dirty_samplers = 0; - } - - if (nv20->dirty_samplers || (nv20->dirty & NV20_NEW_FRAGPROG)) { - nv20_fragtex_bind(nv20); - nv20->dirty &= ~NV20_NEW_FRAGPROG; - } - - if (nv20->dirty & NV20_NEW_VTXARRAYS) { - nv20->dirty &= ~NV20_NEW_VTXARRAYS; - nv20_vertex_layout(nv20); - nv20_vtxbuf_bind(nv20); - } - - if (nv20->dirty & NV20_NEW_BLEND) { - nv20->dirty &= ~NV20_NEW_BLEND; - nv20_state_emit_blend(nv20); - } - - if (nv20->dirty & NV20_NEW_BLENDCOL) { - nv20->dirty &= ~NV20_NEW_BLENDCOL; - nv20_state_emit_blend_color(nv20); - } - - if (nv20->dirty & NV20_NEW_RAST) { - nv20->dirty &= ~NV20_NEW_RAST; - nv20_state_emit_rast(nv20); - } - - if (nv20->dirty & NV20_NEW_DSA) { - nv20->dirty &= ~NV20_NEW_DSA; - nv20_state_emit_dsa(nv20); - } - - if (nv20->dirty & NV20_NEW_VIEWPORT) { - nv20->dirty &= ~NV20_NEW_VIEWPORT; - nv20_state_emit_viewport(nv20); - } - - if (nv20->dirty & NV20_NEW_SCISSOR) { - nv20->dirty &= ~NV20_NEW_SCISSOR; - nv20_state_emit_scissor(nv20); - } - - if (nv20->dirty & NV20_NEW_FRAMEBUFFER) { - nv20->dirty &= ~NV20_NEW_FRAMEBUFFER; - nv20_state_emit_framebuffer(nv20); - } - - /* Emit relocs for every referenced buffer. - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. This isn't very efficient, but we don't - * seem to take a significant performance hit. Will be improved - * at some point. Vertex arrays are emitted by nv20_vbo.c - */ - - /* Render target */ - rt_bo = nouveau_bo(nv20->rt[0]); - BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1); - OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - if (nv20->zeta) { - struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta); - BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1); - OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1); - OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/ - } - - /* Vertex buffer */ - BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - /* Texture images */ - for (i = 0; i < 2; i++) { - if (!(nv20->fp_samplers & (1 << i))) - continue; - struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer); - BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1); - OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1); - OUT_RELOCd(chan, bo, nv20->tex[i].format, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | - NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, - NV20TCL_TX_FORMAT_DMA1); - } -} - diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c deleted file mode 100644 index 4224bdd6afa..00000000000 --- a/src/gallium/drivers/nv20/nv20_surface.c +++ /dev/null @@ -1,63 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "nv20_context.h" -#include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" -#include "util/u_tile.h" - -static void -nv20_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - struct nv20_context *nv20 = nv20_context(pipe); - struct nv04_surface_2d *eng2d = nv20->screen->eng2d; - - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); -} - -static void -nv20_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv20_context *nv20 = nv20_context(pipe); - struct nv04_surface_2d *eng2d = nv20->screen->eng2d; - - eng2d->fill(eng2d, dest, destx, desty, width, height, value); -} - -void -nv20_init_surface_functions(struct nv20_context *nv20) -{ - nv20->pipe.surface_copy = nv20_surface_copy; - nv20->pipe.surface_fill = nv20_surface_fill; -} diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c deleted file mode 100644 index 699773e8e6f..00000000000 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ /dev/null @@ -1,178 +0,0 @@ -#include <pipe/p_state.h> -#include <pipe/p_defines.h> -#include <pipe/p_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_math.h> -#include <nouveau/nouveau_winsys.h> -#include "nv20_context.h" -#include "nv20_screen.h" -#include "nv20_state.h" - -struct nv20_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; - -static void -nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, - struct pipe_texture *template) -{ - memset(template, 0, sizeof(struct pipe_texture)); - template->target = pt->target; - template->format = pt->format; - template->width0 = width; - template->height0 = height; - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; -} - -static struct pipe_transfer * -nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct nv20_miptree *mt = (struct nv20_miptree *)pt; - struct nv20_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; - - tx = CALLOC_STRUCT(nv20_transfer); - if (!tx) - return NULL; - - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; - tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; - - /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || - debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) - { - tx->direct = true; - tx->surface = pscreen->get_tex_surface(pscreen, pt, - 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); - return &tx->base; - } - - tx->direct = false; - - nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template); - - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); - if (!tx_tex) - { - FREE(tx); - return NULL; - } - - tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch; - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); - - pipe_texture_reference(&tx_tex, NULL); - - if (!tx->surface) - { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct nv20_screen *nvscreen = nv20_screen(pscreen); - struct pipe_surface *src; - - src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, x, y, - w, h); - - pipe_surface_reference(&src, NULL); - } - - return &tx->base; -} - -static void -nv20_transfer_del(struct pipe_transfer *ptx) -{ - struct nv20_transfer *tx = (struct nv20_transfer *)ptx; - - if (!tx->direct && (ptx->usage = PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv20_screen *nvscreen = nv20_screen(pscreen); - struct pipe_surface *dst; - - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, tx->base.x, tx->base.y, - tx->surface, 0, 0, - tx->base.width, tx->base.height); - - pipe_surface_reference(&dst, NULL); - } - - pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); - FREE(ptx); -} - -static void * -nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv20_transfer *tx = (struct nv20_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv20_miptree *mt = (struct nv20_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); - - if(!tx->direct) - return map + ns->base.offset; - else - return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); -} - -static void -nv20_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv20_transfer *tx = (struct nv20_transfer *)ptx; - struct nv20_miptree *mt = (struct nv20_miptree *)tx->surface->texture; - - pipe_buffer_unmap(pscreen, mt->buffer); -} - -void -nv20_screen_init_transfer_functions(struct pipe_screen *pscreen) -{ - pscreen->get_tex_transfer = nv20_transfer_new; - pscreen->tex_transfer_destroy = nv20_transfer_del; - pscreen->transfer_map = nv20_transfer_map; - pscreen->transfer_unmap = nv20_transfer_unmap; -} diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c deleted file mode 100644 index 52991a0d856..00000000000 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ /dev/null @@ -1,79 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "pipe/p_inlines.h" - -#include "nv20_context.h" -#include "nv20_state.h" - -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_pushbuf.h" - -void nv20_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) -{ - struct pipe_screen *pscreen = pipe->screen; - struct nv20_context *nv20 = nv20_context( pipe ); - struct draw_context *draw = nv20->draw; - unsigned i; - - nv20_emit_hw_state(nv20); - - /* - * Map vertex buffers - */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (nv20->vtxbuf[i].buffer) { - void *buf - = pipe_buffer_map(pscreen, - nv20->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } - } - /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pscreen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, NULL); - } - - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, - nv20->constbuf[PIPE_SHADER_VERTEX], - nv20->constbuf_nr[PIPE_SHADER_VERTEX]); - - /* draw! */ - draw_arrays(nv20->draw, prim, start, count); - - /* - * unmap vertex/index buffers - */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (nv20->vtxbuf[i].buffer) { - pipe_buffer_unmap(pscreen, nv20->vtxbuf[i].buffer); - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - } - if (indexBuffer) { - pipe_buffer_unmap(pscreen, indexBuffer); - draw_set_mapped_element_buffer(draw, 0, NULL); - } - - draw_flush(nv20->draw); -} - -void nv20_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - nv20_draw_elements(pipe, NULL, 0, prim, start, count); -} - - - diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c deleted file mode 100644 index 7886c2af7e6..00000000000 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ /dev/null @@ -1,841 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" - -#include "nv20_context.h" -#include "nv20_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 2. Arb. swz/negation - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv20_shader.h" - -#define swz(s,x,y,z,w) nv20_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv20_sr_neg((s)) -#define abs(s) nv20_sr_abs((s)) - -struct nv20_vpc { - struct nv20_vertex_program *vp; - - struct nv20_vertex_program_exec *vpi; - - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; - - struct nv20_sreg *imm; - unsigned nr_imm; -}; - -static struct nv20_sreg -temp(struct nv20_vpc *vpc) -{ - int idx; - - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; - return nv20_sr(NV30SR_TEMP, idx); -} - -static struct nv20_sreg -constant(struct nv20_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nv20_vertex_program *vp = vpc->vp; - struct nv20_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nv20_sr(NV30SR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nv20_sr(NV30SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv20_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv20_vpc *vpc, uint32_t *hw, int pos, struct nv20_sreg src) -{ - struct nv20_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NV30SR_TEMP: - sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); - break; - case NV30SR_INPUT: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); - break; - case NV30SR_CONST: - sr |= (NV30_VP_SRC_REG_TYPE_CONST << - NV30_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NV30SR_NONE: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); - -/* - * |VVV| - * d�.�b - * \u/ - * - */ - - switch (pos) { - case 0: - hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> - NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << - NV30_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> - NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << - NV30_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv20_vpc *vpc, uint32_t *hw, int slot, struct nv20_sreg dst) -{ - struct nv20_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NV30SR_TEMP: - hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); - break; - case NV30SR_OUTPUT: - switch (dst.index) { - case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - default: - break; - } - - hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); - hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - - /*XXX: no way this is entirely correct, someone needs to - * figure out what exactly it is. - */ - hw[3] |= 0x800; - break; - default: - assert(0); - } -} - -static void -nv20_vp_arith(struct nv20_vpc *vpc, int slot, int op, - struct nv20_sreg dst, int mask, - struct nv20_sreg s0, struct nv20_sreg s1, - struct nv20_sreg s2) -{ - struct nv20_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); - - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); -// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; -// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); - - if (dst.type == NV30SR_OUTPUT) { - if (slot) - hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); - } else { - if (slot) - hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nv20_sreg -tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv20_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nv20_sr(NV30SR_INPUT, fsrc->Register.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->Register.Index) - vpc->high_temp = fsrc->Register.Index; - src = nv20_sr(NV30SR_TEMP, fsrc->Register.Index); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nv20_sreg -tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv20_sreg dst; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - dst = nv20_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->Register.Index]); - - break; - case TGSI_FILE_TEMPORARY: - dst = nv20_sr(NV30SR_TEMP, fdst->Register.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nv20_sreg src[3], dst, tmp; - struct nv20_sreg none = nv20_sr(NV30SR_NONE, 0); - int mask; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - vpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_VP_INST_DEST_POS; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV30_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_EDGEFLAG: - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->output_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv20_vertprog_prepare(struct nv20_vpc *vpc) -{ - struct tgsi_parse_context p; - int nr_imm = 0; - - tgsi_parse_init(&p, vpc->vp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv20_sreg)); - assert(vpc->imm); - } - - return TRUE; -} - -static void -nv20_vertprog_translate(struct nv20_context *nv20, - struct nv20_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv20_vpc *vpc = NULL; - - tgsi_dump(vp->pipe.tokens,0); - - vpc = CALLOC(1, sizeof(struct nv20_vpc)); - if (!vpc) - return; - vpc->vp = vp; - vpc->high_temp = -1; - - if (!nv20_vertprog_prepare(vpc)) { - FREE(vpc); - return; - } - - tgsi_parse_init(&parse, vp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv20_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(imm->Immediate.NrTokens == 4 + 1); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv20_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - FREE(vpc); -} - -static boolean -nv20_vertprog_validate(struct nv20_context *nv20) -{ - struct pipe_screen *pscreen = nv20->pipe.screen; - struct nouveau_winsys *nvws = nv20->nvws; - struct nouveau_grobj *rankine = nv20->screen->rankine; - struct nv20_vertex_program *vp; - struct pipe_buffer *constbuf; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - vp = nv20->vertprog; - constbuf = nv20->constbuf[PIPE_SHADER_VERTEX]; - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) { - nv20_vertprog_translate(nv20, vp); - if (!vp->translated) - return FALSE; - } - - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nv20->screen->vp_exec_heap; - struct nouveau_stateobj *so; - uint vplen = vp->nr_insns; - - if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nv20_vertex_program *evict; - - evict = heap->next->priv; - nvws->res_free(&evict->exec); - } - - if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - so = so_new(2, 0); - so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - so_ref(so, &vp->so); - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv20->screen->vp_data_heap; - - if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nv20_vertex_program *evict; - - evict = heap->next->priv; - nvws->res_free(&evict->data); - } - - if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv20_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv20_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV30_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nv20_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); - } - - if (constbuf) - pipe_buffer_unmap(pscreen, constbuf); - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", - i, vp->insns[i].data[0], vp->insns[i].data[1], - vp->insns[i].data[2], vp->insns[i].data[3]); - } -#endif - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); - } - } - - if (vp->so != nv20->state.hw[NV30_STATE_VERTPROG]) { - so_ref(vp->so, &nv20->state.hw[NV30_STATE_VERTPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv20_vertprog_destroy(struct nv20_context *nv20, struct nv20_vertex_program *vp) -{ - struct nouveau_winsys *nvws = nv20->screen->nvws; - - vp->translated = FALSE; - - if (vp->nr_insns) { - FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - - if (vp->nr_consts) { - FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } - - nvws->res_free(&vp->exec); - vp->exec_start = 0; - nvws->res_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - - vp->ir = vp->or = 0; - so_ref(NULL, &vp->so); -} - -struct nv20_state_entry nv20_state_vertprog = { - .validate = nv20_vertprog_validate, - .dirty = { - .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, - .hw = NV30_STATE_VERTPROG, - } -}; diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 54572e9ab3a..8bfd7b2c909 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -1,6 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "nv30_context.h" #include "nv30_screen.h" @@ -43,7 +43,7 @@ nv30_destroy(struct pipe_context *pipe) } struct pipe_context * -nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) +nv30_create(struct pipe_screen *pscreen, void *priv) { struct nv30_screen *screen = nv30_screen(pscreen); struct pipe_winsys *ws = pscreen->winsys; @@ -54,12 +54,12 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) if (!nv30) return NULL; nv30->screen = screen; - nv30->pctx_id = pctx_id; nv30->nvws = nvws; nv30->pipe.winsys = ws; nv30->pipe.screen = pscreen; + nv30->pipe.priv = priv; nv30->pipe.destroy = nv30_destroy; nv30->pipe.draw_arrays = nv30_draw_arrays; nv30->pipe.draw_elements = nv30_draw_elements; diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index e59449287b5..b3b26f7f94a 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -1,6 +1,8 @@ #ifndef __NV30_CONTEXT_H__ #define __NV30_CONTEXT_H__ +#include <stdio.h> + #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -8,6 +10,7 @@ #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_inlines.h" #include "draw/draw_vertex.h" @@ -108,7 +111,6 @@ struct nv30_context { struct nouveau_winsys *nvws; struct nv30_screen *screen; - unsigned pctx_id; struct draw_context *draw; @@ -206,4 +208,8 @@ extern void nv30_draw_elements(struct pipe_context *pipe, extern void nv30_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); +/* nv30_context.c */ +struct pipe_context * +nv30_create(struct pipe_screen *pscreen, void *priv); + #endif diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 2d565cb631b..2c432c6dfa7 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -1,7 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 98935678911..0cc3172dcd5 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -43,7 +43,6 @@ static struct nv30_texture_format * nv30_fragtex_format(uint pipe_format) { struct nv30_texture_format *tf = nv30_texture_formats; - char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -65,7 +64,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); struct nv30_texture_format *tf; struct nouveau_stateobj *so; - uint32_t txf, txs , txp; + uint32_t txf, txs; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; tf = nv30_fragtex_format(pt->format); @@ -97,13 +96,6 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) return NULL; } - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - txp = 0; - } else { - txp = nv30mt->level[0].pitch; - txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */; - } - txs = tf->swizzle; so = so_new(1, 8, 2); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 8fbba38e78f..c29c36e20aa 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -1,11 +1,11 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "nv30_context.h" -#include "../nv04/nv04_surface_2d.h" +#include "../nouveau/nv04_surface_2d.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 9ed48178dc2..8f9b26ea56f 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -20,9 +20,6 @@ struct nouveau_winsys { struct pipe_screen *pscreen; - unsigned nr_pctx; - struct pipe_context **pctx; - struct pipe_surface *front; }; @@ -67,6 +64,16 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) case NOUVEAU_CAP_HW_VTXBUF: case NOUVEAU_CAP_HW_IDXBUF: return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 0; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; @@ -163,9 +170,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen) so_ref(NULL, &screen->state[i]); } - nouveau_resource_free(&screen->vp_exec_heap); - nouveau_resource_free(&screen->vp_data_heap); - nouveau_resource_free(&screen->query_heap); + nouveau_resource_destroy(&screen->vp_exec_heap); + nouveau_resource_destroy(&screen->vp_data_heap); + nouveau_resource_destroy(&screen->query_heap); nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->rankine); @@ -202,6 +209,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv30_screen_get_param; pscreen->get_paramf = nv30_screen_get_paramf; pscreen->is_format_supported = nv30_screen_surface_format_supported; + pscreen->context_create = nv30_create; nv30_screen_init_miptree_functions(pscreen); nv30_screen_init_transfer_functions(pscreen); diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h index 5fbd998b539..8591cd31cab 100644 --- a/src/gallium/drivers/nv30/nv30_screen.h +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -3,14 +3,14 @@ #include "nouveau/nouveau_screen.h" -#include "nv04/nv04_surface_2d.h" +#include "nouveau/nv04_surface_2d.h" struct nv30_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; - unsigned cur_pctx; + struct nv30_context *cur_ctx; /* HW graphics objects */ struct nv04_surface_2d *eng2d; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 66096de61e8..f775938ba79 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -1,6 +1,6 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "tgsi/tgsi_parse.h" @@ -16,27 +16,27 @@ nv30_blend_state_create(struct pipe_context *pipe, struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); struct nouveau_stateobj *so = so_new(5, 8, 0); - if (cso->blend_enable) { + if (cso->rt[0].blend_enable) { so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | - nvgl_blend_func(cso->rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rgb_dst_factor)); + so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rt[0].rgb_dst_factor)); /* FIXME: Gallium assumes GL_EXT_blend_func_separate. It is not the case for NV30 */ so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rgb_func)); + so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); } else { so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1); so_data (so, 0); } so_method(so, rankine, NV34TCL_COLOR_MASK, 1); - so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); if (cso->logicop_enable) { so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); @@ -590,12 +590,12 @@ nv30_set_clip_state(struct pipe_context *pipe, static void nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv30_context *nv30 = nv30_context(pipe); - nv30->constbuf[shader] = buf->buffer; - nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv30->constbuf[shader] = buf; + nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv30->dirty |= NV30_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index ac52d946f02..d9650f63eb2 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -44,13 +44,15 @@ nv30_state_emit(struct nv30_context *nv30) unsigned i; uint64_t states; - if (nv30->pctx_id != screen->cur_pctx) { + /* XXX: racy! + */ + if (nv30 != screen->cur_ctx) { for (i = 0; i < NV30_STATE_MAX; i++) { if (state->hw[i] && screen->state[i] != state->hw[i]) state->dirty |= (1ULL << i); } - screen->cur_pctx = nv30->pctx_id; + screen->cur_ctx = nv30; } for (i = 0, states = state->dirty; states; i++) { diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c index 5e237e13eb5..b48c5ab51a0 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -28,8 +28,8 @@ #include "nv30_context.h" #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_simple_screen.h" +#include "util/u_inlines.h" #include "util/u_tile.h" static void diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 65598991c68..554bcbbdd0e 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -1,6 +1,6 @@ #include <pipe/p_state.h> #include <pipe/p_defines.h> -#include <pipe/p_inlines.h> +#include <util/u_inlines.h> #include <util/u_format.h> #include <util/u_memory.h> #include <util/u_math.h> diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 1c5db03ea24..a83ddf11546 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -1,6 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "nv30_context.h" #include "nv30_state.h" @@ -223,7 +223,6 @@ nv30_draw_arrays(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static INLINE void @@ -382,7 +381,7 @@ nv30_draw_elements_inline(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); - return FALSE; + return; } switch (ib_size) { @@ -424,7 +423,7 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, FIRE_RING(chan); continue; } - + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); @@ -468,7 +467,7 @@ nv30_draw_elements(struct pipe_context *pipe, if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return; + return; } if (idxbuf) { diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index e77a5be3f23..809be3712da 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -1,7 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index f79ae4db84e..b0b90032de1 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -1,6 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "nv40_context.h" #include "nv40_screen.h" @@ -43,7 +43,7 @@ nv40_destroy(struct pipe_context *pipe) } struct pipe_context * -nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) +nv40_create(struct pipe_screen *pscreen, void *priv) { struct nv40_screen *screen = nv40_screen(pscreen); struct pipe_winsys *ws = pscreen->winsys; @@ -54,11 +54,11 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) if (!nv40) return NULL; nv40->screen = screen; - nv40->pctx_id = pctx_id; nv40->nvws = nvws; nv40->pipe.winsys = ws; + nv40->pipe.priv = priv; nv40->pipe.screen = pscreen; nv40->pipe.destroy = nv40_destroy; nv40->pipe.draw_arrays = nv40_draw_arrays; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index e219bb537ac..958a48f2a4a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -1,6 +1,8 @@ #ifndef __NV40_CONTEXT_H__ #define __NV40_CONTEXT_H__ +#include <stdio.h> + #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -8,6 +10,7 @@ #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_inlines.h" #include "draw/draw_vertex.h" @@ -108,7 +111,6 @@ struct nv40_context { struct nouveau_winsys *nvws; struct nv40_screen *screen; - unsigned pctx_id; struct draw_context *draw; @@ -227,4 +229,8 @@ extern void nv40_draw_elements(struct pipe_context *pipe, extern void nv40_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); +/* nv40_context.c */ +struct pipe_context * +nv40_create(struct pipe_screen *pscreen, void *priv); + #endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index d826f8c2f5f..60ab49fad1c 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -1,5 +1,5 @@ #include "pipe/p_shader_tokens.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_pack_color.h" @@ -271,7 +271,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, + draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, 0, map, nr); } diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 1237066c398..dc24f9b08a5 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -1,7 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 89bd155ff49..ad1a9a51952 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -1,11 +1,11 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "nv40_context.h" -#include "../nv04/nv04_surface_2d.h" +#include "../nouveau/nv04_surface_2d.h" diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 9e55e5a089c..001147e752f 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -52,6 +52,16 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param) if (screen->curie->grclass == NV40TCL) return 1; return 0; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 0; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; @@ -147,9 +157,9 @@ nv40_screen_destroy(struct pipe_screen *pscreen) so_ref(NULL, &screen->state[i]); } - nouveau_resource_free(&screen->vp_exec_heap); - nouveau_resource_free(&screen->vp_data_heap); - nouveau_resource_free(&screen->query_heap); + nouveau_resource_destroy(&screen->vp_exec_heap); + nouveau_resource_destroy(&screen->vp_data_heap); + nouveau_resource_destroy(&screen->query_heap); nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->curie); @@ -186,6 +196,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv40_screen_get_param; pscreen->get_paramf = nv40_screen_get_paramf; pscreen->is_format_supported = nv40_screen_surface_format_supported; + pscreen->context_create = nv40_create; nv40_screen_init_miptree_functions(pscreen); nv40_screen_init_transfer_functions(pscreen); diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h index 57b4c8fc46c..9437aa050d4 100644 --- a/src/gallium/drivers/nv40/nv40_screen.h +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -2,14 +2,14 @@ #define __NV40_SCREEN_H__ #include "nouveau/nouveau_screen.h" -#include "nv04/nv04_surface_2d.h" +#include "nouveau/nv04_surface_2d.h" struct nv40_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; - unsigned cur_pctx; + struct nv40_context *cur_ctx; /* HW graphics objects */ struct nv04_surface_2d *eng2d; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 5084c48eeb4..51b40e51e49 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -1,6 +1,6 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "draw/draw_context.h" @@ -18,26 +18,26 @@ nv40_blend_state_create(struct pipe_context *pipe, struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); struct nouveau_stateobj *so = so_new(5, 8, 0); - if (cso->blend_enable) { + if (cso->rt[0].blend_enable) { so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | - nvgl_blend_func(cso->rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rgb_dst_factor)); + so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rt[0].rgb_dst_factor)); so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | - nvgl_blend_eqn(cso->rgb_func)); + so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | + nvgl_blend_eqn(cso->rt[0].rgb_func)); } else { so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); so_data (so, 0); } so_method(so, curie, NV40TCL_COLOR_MASK, 1); - so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); if (cso->logicop_enable) { so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); @@ -605,12 +605,12 @@ nv40_set_clip_state(struct pipe_context *pipe, static void nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->constbuf[shader] = buf->buffer; - nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv40->constbuf[shader] = buf; + nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv40->dirty |= NV40_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 13fe854915b..1c4007a129e 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -61,13 +61,15 @@ nv40_state_emit(struct nv40_context *nv40) unsigned i; uint64_t states; - if (nv40->pctx_id != screen->cur_pctx) { + /* XXX: race conditions + */ + if (nv40 != screen->cur_ctx) { for (i = 0; i < NV40_STATE_MAX; i++) { if (state->hw[i] && screen->state[i] != state->hw[i]) state->dirty |= (1ULL << i); } - screen->cur_pctx = nv40->pctx_id; + screen->cur_ctx = nv40; } for (i = 0, states = state->dirty; states; i++) { diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index a596547974a..02ecfd7bbb7 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -27,7 +27,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 791ee6823d3..ee266c6cfb1 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -1,6 +1,6 @@ #include <pipe/p_state.h> #include <pipe/p_defines.h> -#include <pipe/p_inlines.h> +#include <util/u_inlines.h> #include <util/u_format.h> #include <util/u_memory.h> #include <util/u_math.h> diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index a777898f688..1e14edc56a7 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -1,6 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "nv40_context.h" #include "nv40_state.h" @@ -382,7 +382,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); - return FALSE; + return; } switch (ib_size) { @@ -424,7 +424,7 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, FIRE_RING(chan); continue; } - + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 8d80fcad38e..b289eef0fc2 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -1,7 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 5997456e4c9..867bd03e69d 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -22,7 +22,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "nv50_context.h" #include "nv50_screen.h" @@ -34,6 +34,11 @@ nv50_flush(struct pipe_context *pipe, unsigned flags, struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_channel *chan = nv50->screen->base.channel; + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(chan, nv50->screen->tesla, 0x1338, 1); + OUT_RING (chan, 0x20); + } + if (flags & PIPE_FLUSH_FRAME) FIRE_RING(chan); } @@ -67,8 +72,12 @@ nv50_destroy(struct pipe_context *pipe) so_ref(NULL, &nv50->state.vertprog); if (nv50->state.fragprog) so_ref(NULL, &nv50->state.fragprog); - if (nv50->state.programs) - so_ref(NULL, &nv50->state.programs); + if (nv50->state.geomprog) + so_ref(NULL, &nv50->state.geomprog); + if (nv50->state.fp_linkage) + so_ref(NULL, &nv50->state.fp_linkage); + if (nv50->state.gp_linkage) + so_ref(NULL, &nv50->state.gp_linkage); if (nv50->state.vtxfmt) so_ref(NULL, &nv50->state.vtxfmt); if (nv50->state.vtxbuf) @@ -77,12 +86,16 @@ nv50_destroy(struct pipe_context *pipe) so_ref(NULL, &nv50->state.vtxattr); draw_destroy(nv50->draw); + + if (nv50->screen->cur_ctx == nv50) + nv50->screen->cur_ctx = NULL; + FREE(nv50); } struct pipe_context * -nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) +nv50_create(struct pipe_screen *pscreen, void *priv) { struct pipe_winsys *pipe_winsys = pscreen->winsys; struct nv50_screen *screen = nv50_screen(pscreen); @@ -92,15 +105,17 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) if (!nv50) return NULL; nv50->screen = screen; - nv50->pctx_id = pctx_id; nv50->pipe.winsys = pipe_winsys; nv50->pipe.screen = pscreen; + nv50->pipe.priv = priv; nv50->pipe.destroy = nv50_destroy; nv50->pipe.draw_arrays = nv50_draw_arrays; + nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; nv50->pipe.draw_elements = nv50_draw_elements; + nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; nv50->pipe.clear = nv50_clear; nv50->pipe.flush = nv50_flush; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cbd4c3ff86d..14cef4c0bf8 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -1,6 +1,7 @@ #ifndef __NV50_CONTEXT_H__ #define __NV50_CONTEXT_H__ +#include <stdio.h> #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -8,6 +9,7 @@ #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_inlines.h" #include "draw/draw_vertex.h" @@ -29,9 +31,7 @@ #define NV50_CB_PVP 1 #define NV50_CB_PFP 2 #define NV50_CB_PGP 3 -#define NV50_CB_TIC 4 -#define NV50_CB_TSC 5 -#define NV50_CB_PUPLOAD 6 +#define NV50_CB_AUX 4 #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) @@ -45,9 +45,11 @@ #define NV50_NEW_VERTPROG_CB (1 << 9) #define NV50_NEW_FRAGPROG (1 << 10) #define NV50_NEW_FRAGPROG_CB (1 << 11) -#define NV50_NEW_ARRAYS (1 << 12) -#define NV50_NEW_SAMPLER (1 << 13) -#define NV50_NEW_TEXTURE (1 << 14) +#define NV50_NEW_GEOMPROG (1 << 12) +#define NV50_NEW_GEOMPROG_CB (1 << 13) +#define NV50_NEW_ARRAYS (1 << 14) +#define NV50_NEW_SAMPLER (1 << 15) +#define NV50_NEW_TEXTURE (1 << 16) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -129,10 +131,13 @@ struct nv50_state { unsigned miptree_nr[PIPE_SHADER_TYPES]; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; - struct nouveau_stateobj *programs; + struct nouveau_stateobj *geomprog; + struct nouveau_stateobj *fp_linkage; + struct nouveau_stateobj *gp_linkage; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; + struct nouveau_stateobj *instbuf; unsigned vtxelt_nr; }; @@ -140,7 +145,6 @@ struct nv50_context { struct pipe_context pipe; struct nv50_screen *screen; - unsigned pctx_id; struct draw_context *draw; @@ -157,6 +161,7 @@ struct nv50_context { struct pipe_framebuffer_state framebuffer; struct nv50_program *vertprog; struct nv50_program *fragprog; + struct nv50_program *geomprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; @@ -193,11 +198,22 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); +extern void nv50_draw_arrays_instanced(struct pipe_context *, unsigned mode, + unsigned start, unsigned count, + unsigned startInstance, + unsigned instanceCount); extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +extern void nv50_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); extern void nv50_vbo_validate(struct nv50_context *nv50); /* nv50_clear.c */ @@ -207,7 +223,9 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); -extern void nv50_linkage_validate(struct nv50_context *nv50); +extern void nv50_geomprog_validate(struct nv50_context *nv50); +extern void nv50_fp_linkage_validate(struct nv50_context *nv50); +extern void nv50_gp_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); @@ -231,4 +249,8 @@ nv50_upload_sifc(struct nv50_context *nv50, void *src, unsigned src_format, int src_pitch, int x, int y, int w, int h, int cpp); +/* nv50_context.c */ +struct pipe_context * +nv50_create(struct pipe_screen *pscreen, void *priv); + #endif diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a139..7297c74a83c 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -22,7 +22,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "nv50_context.h" @@ -92,12 +92,23 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) case PIPE_FORMAT_Z24S8_UNORM: tile_flags = 0x1800; break; + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c00; + break; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: tile_flags = 0x2800; break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + tile_flags = 0x7400; + break; default: - tile_flags = 0x7000; + if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) && + util_format_get_blocksizebits(pt->format) == 32) + tile_flags = 0x7a00; + else + tile_flags = 0x7000; break; } @@ -145,7 +156,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +199,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 593d7436037..2372cbbef69 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -23,7 +23,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" @@ -92,6 +92,11 @@ struct nv50_reg { int rhw; /* result hw for FP outputs, or interpolant index */ int acc; /* instruction where this reg is last read (first insn == 1) */ + + int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */ + int indirect[2]; /* index into pc->addr, or -1 */ + + ubyte buf_index; /* c{0 .. 15}[] or g{0 .. 15}[] */ }; #define NV50_MOD_NEG 1 @@ -135,7 +140,8 @@ struct nv50_pc { int immd_nr; struct nv50_reg **addr; int addr_nr; - uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ + struct nv50_reg *sysval; + int sysval_nr; struct nv50_reg *temp_temp[16]; struct nv50_program_exec *temp_temp_exec[16]; @@ -171,6 +177,8 @@ struct nv50_pc { uint8_t edgeflag_out; }; +static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *); + static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) { @@ -179,7 +187,10 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) reg->hw = hw; reg->mod = 0; reg->rhw = -1; + reg->vtx = -1; reg->acc = 0; + reg->indirect[0] = reg->indirect[1] = -1; + reg->buf_index = (type == P_CONST) ? 1 : 0; } static INLINE unsigned @@ -197,7 +208,8 @@ terminate_mbb(struct nv50_pc *pc) /* remove records of temporary address register values */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - pc->r_addr[i].rhw = -1; + if (pc->r_addr[i].index < 0) + pc->r_addr[i].acc = 0; } static void @@ -260,6 +272,7 @@ reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) if (reg) { alloc_reg(pc, reg); *ri = *reg; + reg->indirect[0] = reg->indirect[1] = -1; reg->mod = 0; } return ri; @@ -464,6 +477,12 @@ is_join(struct nv50_program_exec *e) return FALSE; } +static INLINE boolean +is_control_flow(struct nv50_program_exec *e) +{ + return (e->inst[0] & 2); +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) @@ -525,11 +544,33 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) static INLINE void set_addr(struct nv50_program_exec *e, struct nv50_reg *a) { + assert(a->type == P_ADDR); + assert(!(e->inst[0] & 0x0c000000)); assert(!(e->inst[1] & 0x00000004)); e->inst[0] |= (a->hw & 3) << 26; - e->inst[1] |= (a->hw >> 2) << 2; + e->inst[1] |= a->hw & 4; +} + +static void +emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t); + +static void +emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int); + +static void +emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[1] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + set_addr(e, src); + + emit(pc, e); } static void @@ -548,72 +589,6 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } -static struct nv50_reg * -alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) -{ - struct nv50_reg *a_tgsi = NULL, *a = NULL; - int i; - uint8_t avail = ~pc->addr_alloc; - - if (!ref) { - /* allocate for TGSI_FILE_ADDRESS */ - while (avail) { - i = ffs(avail) - 1; - - if (pc->r_addr[i].rhw < 0 || - pc->r_addr[i].acc != pc->insn_cur) { - pc->addr_alloc |= (1 << i); - - pc->r_addr[i].rhw = -1; - pc->r_addr[i].index = i; - return &pc->r_addr[i]; - } - avail &= ~(1 << i); - } - assert(0); - return NULL; - } - - /* Allocate and set an address reg so we can access 'ref'. - * - * If and r_addr->index will be -1 or the hw index the value - * value in rhw is relative to. If rhw < 0, the reg has not - * been initialized or is in use for TGSI_FILE_ADDRESS. - */ - while (avail) { /* only consider regs that are not TGSI */ - i = ffs(avail) - 1; - avail &= ~(1 << i); - - if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) { - /* prefer an usused reg with low hw index */ - a = &pc->r_addr[i]; - continue; - } - if (!a && pc->r_addr[i].acc != pc->insn_cur) - a = &pc->r_addr[i]; - - if (ref->hw - pc->r_addr[i].rhw >= 128) - continue; - - if ((ref->acc >= 0 && pc->r_addr[i].index < 0) || - (ref->acc < 0 && pc->r_addr[i].index == ref->index)) { - pc->r_addr[i].acc = pc->insn_cur; - return &pc->r_addr[i]; - } - } - assert(a); - - if (ref->acc < 0) - a_tgsi = pc->addr[ref->index]; - - emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); - - a->rhw = ref->hw & ~0x7f; - a->acc = pc->insn_cur; - a->index = a_tgsi ? ref->index : -1; - return a; -} - #define INTERP_LINEAR 0 #define INTERP_FLAT 1 #define INTERP_PERSPECTIVE 2 @@ -657,15 +632,15 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->param.shift = s; e->param.mask = m << (s % 32); - if (src->hw > 127) - set_addr(e, alloc_addr(pc, src)); + if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */ + set_addr(e, get_address_reg(pc, src)); else if (src->acc < 0) { assert(src->type == P_CONST); - set_addr(e, pc->addr[src->index]); + set_addr(e, pc->addr[src->indirect[0]]); } - e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); + e->inst[1] |= (src->buf_index << 22); } /* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ @@ -694,6 +669,12 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + /* indirect (vertex base + c) load from p[] */ + e->inst[0] |= 0x01800000; + set_addr(e, get_address_reg(pc, src)); + } } alloc_reg(pc, src); @@ -808,6 +789,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + e->inst[0] |= 0x01800000; /* src from p[] */ + set_addr(e, get_address_reg(pc, src)); + } } else if (src->type == P_CONST || src->type == P_IMMD) { struct nv50_reg *temp = temp_temp(pc, e); @@ -832,13 +818,13 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x00800000)); - if (e->inst[0] & 0x01000000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x00800000)); set_data(pc, src, 0x7f, 16, e); e->inst[0] |= 0x00800000; } @@ -862,13 +848,13 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x01000000)); - if (e->inst[0] & 0x00800000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x01000000)); set_data(pc, src, 0x7f, 32+14, e); e->inst[0] |= 0x01000000; } @@ -997,11 +983,125 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= dst->hw << 2; e->inst[0] |= s << 16; /* shift left */ - set_src_0_restricted(pc, src, e); + set_src_0(pc, src, e); emit(pc, e); } +static boolean +address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r) +{ + if (!r) + return FALSE; + + if (r->vtx != a->vtx) + return FALSE; + if (r->vtx >= 0) + return (r->indirect[1] == a->indirect[1]); + + if (r->hw < a->rhw || (r->hw - a->rhw) >= 128) + return FALSE; + + if (a->index >= 0) + return (a->index == r->indirect[0]); + return (a->indirect[0] == r->indirect[0]); +} + +static void +load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *a, int shift) +{ + struct nv50_reg mem, *temp; + + ctor_reg(&mem, P_ATTR, -1, dst->vtx); + + assert(dst->type == P_ADDR); + if (!a) { + emit_arl(pc, dst, &mem, 0); + return; + } + temp = alloc_temp(pc, NULL); + + if (shift) { + emit_mov_from_addr(pc, temp, a); + if (shift < 0) + emit_shl_imm(pc, temp, temp, shift); + emit_arl(pc, dst, temp, MAX2(shift, 0)); + } + emit_mov(pc, temp, &mem); + set_addr(pc->p->exec_tail, dst); + + emit_arl(pc, dst, temp, 0); + free_temp(pc, temp); +} + +/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS + * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX + * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX + * case (vtx < 0, acc >= 0): memory address too high to encode + * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS + */ +static struct nv50_reg * +get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref) +{ + int i; + struct nv50_reg *a_ref, *a = NULL; + + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { + if (pc->r_addr[i].acc == 0) + a = &pc->r_addr[i]; /* an unused address reg */ + else + if (address_reg_suitable(&pc->r_addr[i], ref)) { + pc->r_addr[i].acc = pc->insn_cur; + return &pc->r_addr[i]; + } else + if (!a && pc->r_addr[i].index < 0 && + pc->r_addr[i].acc < pc->insn_cur) + a = &pc->r_addr[i]; + } + if (!a) { + /* We'll be able to spill address regs when this + * mess is replaced with a proper compiler ... + */ + NOUVEAU_ERR("out of address regs\n"); + abort(); + return NULL; + } + + /* initialize and reserve for this TGSI instruction */ + a->rhw = 0; + a->index = a->indirect[0] = a->indirect[1] = -1; + a->acc = pc->insn_cur; + + if (!ref) { + a->vtx = -1; + return a; + } + a->vtx = ref->vtx; + + /* now put in the correct value ... */ + + if (ref->vtx >= 0) { + a->indirect[1] = ref->indirect[1]; + + /* For an indirect vertex index, we need to shift address right + * by 2, the address register will contain vtx * 16, we need to + * load from a[vtx * 4]. + */ + load_vertex_base(pc, a, (ref->acc < 0) ? + pc->addr[ref->indirect[1]] : NULL, -2); + } else { + assert(ref->acc < 0 || ref->indirect[0] < 0); + + a->rhw = ref->hw & ~0x7f; + a->indirect[0] = ref->indirect[0]; + a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL; + + emit_add_addr_imm(pc, a, a_ref, a->rhw * 4); + } + return a; +} + #define NV50_MAX_F32 0x880 #define NV50_MAX_S32 0x08c #define NV50_MAX_U32 0x084 @@ -1629,6 +1729,18 @@ emit_ret(struct nv50_pc *pc, int pred, unsigned cc) emit_control_flow(pc, 0x3, pred, cc); } +static void +emit_prim_cmd(struct nv50_pc *pc, unsigned cmd) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000 | (cmd << 9); + e->inst[1] = 0xc0000000; + set_long(pc, e); + + emit(pc, e); +} + #define QOP_ADD 0 #define QOP_SUBR 1 #define QOP_SUB 2 @@ -2171,14 +2283,19 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { - r = alloc_addr(pc, NULL); - pc->addr[dst->Register.Index * 4 + c] = r; + r = get_address_reg(pc, NULL); + r->index = dst->Register.Index * 4 + c; + pc->addr[r->index] = r; } assert(r); return r; } case TGSI_FILE_NULL: return NULL; + case TGSI_FILE_SYSTEM_VALUE: + assert(pc->sysval[dst->Register.Index].type == P_RESULT); + assert(c == 0); + return &pc->sysval[dst->Register.Index]; default: break; } @@ -2208,6 +2325,18 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, switch (src->Register.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->Register.Index * 4 + c]; + + if (!src->Dimension.Dimension) + break; + r = reg_instance(pc, r); + r->vtx = src->Dimension.Index; + + if (!src->Dimension.Indirect) + break; + swz = tgsi_util_get_src_register_swizzle( + &src->DimIndirect, 0); + r->acc = -1; + r->indirect[1] = src->DimIndirect.Index * 4 + swz; break; case TGSI_FILE_TEMPORARY: r = &pc->temp[src->Register.Index * 4 + c]; @@ -2221,12 +2350,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, * use the index field to select the address reg. */ r = reg_instance(pc, NULL); + ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c); + swz = tgsi_util_get_src_register_swizzle( - &src->Indirect, 0); - ctor_reg(r, P_CONST, - src->Indirect.Index * 4 + swz, - src->Register.Index * 4 + c); + &src->Indirect, 0); r->acc = -1; + r->indirect[0] = src->Indirect.Index * 4 + swz; break; case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->Register.Index * 4 + c]; @@ -2237,6 +2366,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; + case TGSI_FILE_SYSTEM_VALUE: + assert(c == 0); + r = &pc->sysval[src->Register.Index]; + break; default: assert(0); break; @@ -2273,7 +2406,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r->mod |= mod & NV50_MOD_I32; assert(r); - if (r->acc >= 0 && r != temp) + if (r->acc >= 0 && r->vtx < 0 && r != temp) return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2495,10 +2628,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_ARL: - assert(src[0][0]); temp = temp_temp(pc, NULL); - emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); - emit_arl(pc, dst[0], temp, 4); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, temp, src[0][c], -1, + CVT_FLOOR | CVT_S32_F32); + emit_arl(pc, dst[c], temp, 4); + } break; case TGSI_OPCODE_BGNLOOP: pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc); @@ -2605,6 +2742,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); break; + case TGSI_OPCODE_EMIT: + emit_prim_cmd(pc, 1); + break; case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; @@ -2628,8 +2768,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDPRIM: + emit_prim_cmd(pc, 2); + break; case TGSI_OPCODE_ENDSUB: assert(pc->in_subroutine); + terminate_mbb(pc); pc->in_subroutine = FALSE; break; case TGSI_OPCODE_EX2: @@ -3028,10 +3172,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!is_long(pc->p->exec_tail)) convert_to_long(pc, pc->p->exec_tail); else - if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + if (is_immd(pc->p->exec_tail) || + is_join(pc->p->exec_tail) || + is_control_flow(pc->p->exec_tail)) emit_nop(pc); pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ + + terminate_mbb(pc); break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -3135,7 +3283,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) static unsigned nv50_revdep_reorder(unsigned m[4], unsigned rdep[4]) { - unsigned i, c, x, unsafe; + unsigned i, c, x, unsafe = 0; for (c = 0; c < 4; c++) m[c] = c; @@ -3327,17 +3475,53 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) * value of 0 for back-facing, and 0xffffffff for front-facing. */ static void -load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) +load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv) { - struct nv50_reg *one = alloc_immd(pc, 1.0f); + struct nv50_reg *temp = alloc_temp(pc, NULL); + int r_pred = 0; - assert(a->rhw == -1); - alloc_reg(pc, a); /* do this before rhw is set */ - a->rhw = 255; - load_interpolant(pc, a); - emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND); + temp->rhw = 255; + emit_interp(pc, temp, NULL, INTERP_FLAT); - FREE(one); + emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32); + + emit_not(pc, temp, temp); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + emit_cvt(pc, sv, temp, -1, CVT_F32_S32); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + + free_temp(pc, temp); +} + +static void +load_instance_id(struct nv50_pc *pc, unsigned index) +{ + struct nv50_reg reg, mem; + + ctor_reg(®, P_TEMP, -1, -1); + ctor_reg(&mem, P_CONST, -1, 24); /* startInstance */ + mem.buf_index = 2; + + emit_add_b32(pc, ®, &pc->sysval[index], &mem); + pc->sysval[index] = reg; +} + +static void +copy_semantic_info(struct nv50_program *p) +{ + unsigned i, id; + + for (i = 0; i < p->cfg.in_nr; ++i) { + id = p->cfg.in[i].id; + p->cfg.in[i].sn = p->info.input_semantic_name[id]; + p->cfg.in[i].si = p->info.input_semantic_index[id]; + } + + for (i = 0; i < p->cfg.out_nr; ++i) { + id = p->cfg.out[i].id; + p->cfg.out[i].sn = p->info.output_semantic_name[id]; + p->cfg.out[i].si = p->info.output_semantic_index[id]; + } } static boolean @@ -3346,7 +3530,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct tgsi_parse_context tp; struct nv50_program *p = pc->p; boolean ret = FALSE; - unsigned i, c, flat_nr = 0; + unsigned i, c, instance_id, vertex_id, flat_nr = 0; tgsi_parse_init(&tp, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&tp)) { @@ -3386,13 +3570,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_PSIZE: p->cfg.psiz = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_EDGEFLAG: pc->edgeflag_out = first; @@ -3432,6 +3616,37 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->interp_mode[i] = mode; } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(d->Declaration.Semantic); + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_FACE: + assert(p->type == PIPE_SHADER_FRAGMENT); + load_frontfacing(pc, + &pc->sysval[first]); + break; + case TGSI_SEMANTIC_INSTANCEID: + assert(p->type == PIPE_SHADER_VERTEX); + instance_id = first; + p->cfg.regs[0] |= (1 << 4); + break; + case TGSI_SEMANTIC_PRIMID: + assert(p->type != PIPE_SHADER_VERTEX); + p->cfg.prim_id = first; + break; + /* + case TGSI_SEMANTIC_PRIMIDIN: + assert(p->type == PIPE_SHADER_GEOMETRY); + pc->sysval[first].hw = 6; + p->cfg.regs[0] |= (1 << 8); + break; + case TGSI_SEMANTIC_VERTEXID: + assert(p->type == PIPE_SHADER_VERTEX); + vertex_id = first; + p->cfg.regs[0] |= (1 << 12) | (1 << 0); + break; + */ + } + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_SAMPLER: @@ -3452,36 +3667,65 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - if (p->type == PIPE_SHADER_VERTEX) { + if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) { int rid = 0; - for (i = 0; i < pc->attr_nr * 4; ++i) { - if (pc->attr[i].acc) { - pc->attr[i].hw = rid++; - p->cfg.attr[i / 32] |= 1 << (i % 32); + if (p->type == PIPE_SHADER_GEOMETRY) { + for (i = 0; i < pc->attr_nr; ++i) { + p->cfg.in[i].hw = rid; + p->cfg.in[i].id = i; + + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->attr[n].acc) + continue; + pc->attr[n].hw = rid++; + p->cfg.in[i].mask |= 1 << c; + } + } + } else { + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); + } + } + if (p->cfg.regs[0] & (1 << 0)) + pc->sysval[vertex_id].hw = rid++; + if (p->cfg.regs[0] & (1 << 4)) { + pc->sysval[instance_id].hw = rid++; + load_instance_id(pc, instance_id); } } for (i = 0, rid = 0; i < pc->result_nr; ++i) { - p->cfg.io[i].hw = rid; - p->cfg.io[i].id = i; + p->cfg.out[i].hw = rid; + p->cfg.out[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; if (!pc->result[n].acc) continue; pc->result[n].hw = rid++; - p->cfg.io[i].mask |= 1 << c; + p->cfg.out[i].mask |= 1 << c; } } + if (p->cfg.prim_id < 0x40) { + /* GP has to write to PrimitiveID */ + ctor_reg(&pc->sysval[p->cfg.prim_id], + P_RESULT, p->cfg.prim_id, rid); + p->cfg.prim_id = rid++; + } for (c = 0; c < 2; ++c) if (p->cfg.two_side[c].hw < 0x40) - p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c] = p->cfg.out[ p->cfg.two_side[c].hw]; if (p->cfg.psiz < 0x40) - p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw; + + copy_semantic_info(p); } else if (p->type == PIPE_SHADER_FRAGMENT) { int rid, aid; @@ -3489,31 +3733,34 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->allow32 = TRUE; - int base = (TGSI_SEMANTIC_POSITION == - p->info.input_semantic_name[0]) ? 0 : 1; + /* do we read FragCoord ? */ + if (pc->attr_nr && + p->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + /* select FCRD components we want accessible */ + for (c = 0; c < 4; ++c) + if (pc->attr[c].acc) + p->cfg.regs[1] |= 1 << (24 + c); + aid = 0; + } else /* offset by 1 if FCRD.w is needed for pinterp */ + aid = popcnt4(p->cfg.regs[1] >> 24); /* non-flat interpolants have to be mapped to * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { if (pc->interp_mode[i] == INTERP_FLAT) - p->cfg.io[m++].id = i; + p->cfg.in[m++].id = i; else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) - p->cfg.io[n].linear = TRUE; - p->cfg.io[n++].id = i; + p->cfg.in[n].linear = TRUE; + p->cfg.in[n++].id = i; } } - - if (!base) /* set w-coordinate mask from perspective interp */ - p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; - - aid = popcnt4( /* if fcrd isn't contained in cfg.io */ - base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); + copy_semantic_info(p); for (n = 0; n < pc->attr_nr; ++n) { - p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id; + p->cfg.in[n].hw = rid = aid; + i = p->cfg.in[n].id; if (p->info.input_semantic_name[n] == TGSI_SEMANTIC_FACE) { @@ -3525,16 +3772,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!pc->attr[i * 4 + c].acc) continue; pc->attr[i * 4 + c].rhw = rid++; - p->cfg.io[n].mask |= 1 << c; + p->cfg.in[n].mask |= 1 << c; load_interpolant(pc, &pc->attr[i * 4 + c]); } - aid += popcnt4(p->cfg.io[n].mask); + aid += popcnt4(p->cfg.in[n].mask); } - if (!base) - p->cfg.regs[1] |= p->cfg.io[0].mask << 24; - m = popcnt4(p->cfg.regs[1] >> 24); /* set count of non-position inputs and of non-flat @@ -3543,32 +3787,33 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.regs[1] |= aid - m; if (flat_nr) { - i = p->cfg.io[pc->attr_nr - flat_nr].hw; + i = p->cfg.in[pc->attr_nr - flat_nr].hw; p->cfg.regs[1] |= (i - m) << 16; } else p->cfg.regs[1] |= p->cfg.regs[1] << 16; /* mark color semantic for light-twoside */ - n = 0x40; - for (i = 0; i < pc->attr_nr; i++) { - ubyte si, sn; - - sn = p->info.input_semantic_name[p->cfg.io[i].id]; - si = p->info.input_semantic_index[p->cfg.io[i].id]; - - if (sn == TGSI_SEMANTIC_COLOR) { - p->cfg.two_side[si] = p->cfg.io[i]; - - /* increase colour count */ - p->cfg.regs[0] += popcnt4( - p->cfg.two_side[si].mask) << 16; - - n = MIN2(n, p->cfg.io[i].hw - m); + n = 0x80; + for (i = 0; i < p->cfg.in_nr; i++) { + if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) { + n = MIN2(n, p->cfg.in[i].hw - m); + p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i]; + + p->cfg.regs[0] += /* increase colour count */ + popcnt4(p->cfg.in[i].mask) << 16; } } - if (n < 0x40) + if (n < 0x80) p->cfg.regs[0] += n; + if (p->cfg.prim_id < 0x40) { + pc->sysval[p->cfg.prim_id].rhw = rid++; + emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL, + INTERP_FLAT); + /* increase FP_INTERPOLANT_CTRL_COUNT */ + p->cfg.regs[1] += 1; + } + /* Initialize FP results: * FragDepth is always first TGSI and last hw output */ @@ -3622,10 +3867,31 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->sysval) + FREE(pc->sysval); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } +static INLINE uint32_t +nv50_map_gs_output_prim(unsigned pprim) +{ + switch (pprim) { + case PIPE_PRIM_POINTS: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; + case PIPE_PRIM_LINE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; + default: + NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim); + abort(); + return 0; + } +} + static boolean ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) { @@ -3639,25 +3905,55 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; assert(pc->addr_nr <= 2); + pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; p->cfg.high_temp = 4; p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.prim_id = 0x40; p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + for (i = 0; i < p->info.num_properties; ++i) { + unsigned *data = &p->info.properties[i].data[0]; + + switch (p->info.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + p->cfg.prim_type = nv50_map_gs_output_prim(data[0]); + break; + case TGSI_PROPERTY_GS_MAX_VERTICES: + p->cfg.vert_count = data[0]; + break; + default: + break; + } + } + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; p->cfg.clpd = 0x40; - p->cfg.io_nr = pc->result_nr; + p->cfg.out_nr = pc->result_nr; + break; + case PIPE_SHADER_GEOMETRY: + assert(p->cfg.prim_type); + assert(p->cfg.vert_count); + + p->cfg.psiz = 0x80; + p->cfg.clpd = 0x80; + p->cfg.prim_id = 0x80; + p->cfg.out_nr = pc->result_nr; + p->cfg.in_nr = pc->attr_nr; + + p->cfg.two_side[0].hw = 0x80; + p->cfg.two_side[1].hw = 0x80; break; case PIPE_SHADER_FRAGMENT: rtype[0] = rtype[1] = P_TEMP; p->cfg.regs[0] = 0x01000004; - p->cfg.io_nr = pc->attr_nr; + p->cfg.in_nr = pc->attr_nr; if (p->info.writes_z) { p->cfg.regs[2] |= 0x00000100; @@ -3715,7 +4011,16 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return FALSE; } for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); + ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + + if (pc->sysval_nr) { + pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *)); + if (!pc->sysval) + return FALSE; + /* will only ever use SYSTEM_VALUE[i].x (hopefully) */ + for (i = 0; i < pc->sysval_nr; ++i) + ctor_reg(&pc->sysval[i], rtype[0], i, -1); + } return TRUE; } @@ -3877,13 +4182,17 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { unsigned cb; - uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], + uint32_t *map = pipe_buffer_map(pscreen, + nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - - if (p->type == PIPE_SHADER_VERTEX) + switch (p->type) { + case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break; + case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break; + default: cb = NV50_CB_PVP; - else - cb = NV50_CB_PFP; + assert(p->type == PIPE_SHADER_VERTEX); + break; + } nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); @@ -3977,19 +4286,18 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(5, 8, 2); + so = so_new(5, 7, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); + NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); so_data (so, p->cfg.attr[0]); so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); - so_data (so, p->cfg.high_result); //8); + so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_VP_START_ID, 1); so_data (so, 0); /* program start offset */ @@ -4033,42 +4341,74 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } +void +nv50_geomprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->geomprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(6, 7, 2); + so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1); + so_data (so, p->cfg.prim_type); + so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1); + so_data (so, p->cfg.vert_count); + so_method(so, tesla, NV50TCL_GP_START_ID, 1); + so_data (so, 0); + so_ref(so, &nv50->state.geomprog); + so_ref(NULL, &so); +} + static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { + struct nv50_program *vp; struct nv50_program *fp = nv50->fragprog; - struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; uint32_t origin = 0x00000010; + vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog; + /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in * the VP is PointCoord. */ memset(pntc, 0, 8 * sizeof(uint32_t)); - for (i = 0; i < fp->cfg.io_nr; i++) { - uint8_t sn, si; - uint8_t j, k = fp->cfg.io[i].id; - unsigned n = popcnt4(fp->cfg.io[i].mask); + for (i = 0; i < fp->cfg.in_nr; i++) { + unsigned j, n = popcnt4(fp->cfg.in[i].mask); - if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) { m += n; continue; } - for (j = 0; j < vp->info.num_outputs; ++j) { - sn = vp->info.output_semantic_name[j]; - si = vp->info.output_semantic_index[j]; - - if (sn == fp->info.input_semantic_name[k] && - si == fp->info.input_semantic_index[k]) + for (j = 0; j < vp->cfg.out_nr; ++j) + if (vp->cfg.out[j].sn == fp->cfg.in[i].sn && + vp->cfg.out[j].si == fp->cfg.in[i].si) break; - } if (j < vp->info.num_outputs) { ubyte enable = - (nv50->rasterizer->pipe.sprite_coord_enable >> si) & 1; + (nv50->rasterizer->pipe.sprite_coord_enable >> vp->cfg.out[j].si) & 1; if (enable == 0) { m += n; @@ -4078,7 +4418,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) /* this is either PointCoord or replaced by sprite coords */ for (c = 0; c < 4; c++) { - if (!(fp->cfg.io[i].mask & (1 << c))) + if (!(fp->cfg.in[i].mask & (1 << c))) continue; pntc[m / 8] |= (c + 1) << ((m % 8) * 4); ++m; @@ -4088,18 +4428,22 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) } static int -nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], - struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) { int c; uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; - uint8_t *map = (uint8_t *)p_map; + uint8_t *map = (uint8_t *)map32; for (c = 0; c < 4; ++c) { if (mf & 1) { if (fpi->linear == TRUE) lin[mid / 32] |= 1 << (mid % 32); - map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + if (mv & 1) + map[mid] = oid; + else + map[mid] = (c == 3) ? (zval + 1) : zval; + ++mid; } oid += mv & 1; @@ -4111,34 +4455,42 @@ nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], } void -nv50_linkage_validate(struct nv50_context *nv50) +nv50_fp_linkage_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *vp = nv50->vertprog; struct nv50_program *fp = nv50->fragprog; struct nouveau_stateobj *so; - struct nv50_sreg4 dummy, *vpo; + struct nv50_sreg4 dummy; int i, n, c, m = 0; - uint32_t map[16], lin[4], reg[5], pcrd[8]; + uint32_t map[16], lin[4], reg[6], pcrd[8]; + uint8_t zval = 0x40; + if (nv50->geomprog) { + vp = nv50->geomprog; + zval = 0x80; + } memset(map, 0, sizeof(map)); memset(lin, 0, sizeof(lin)); reg[1] = 0x00000004; /* low and high clip distance map ids */ reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ reg[3] = 0x00000000; /* point size map id & enable */ + reg[5] = 0x00000000; /* primitive ID map slot */ reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ reg[4] = fp->cfg.regs[1]; /* interpolant info */ dummy.linear = FALSE; dummy.mask = 0xf; /* map all components of HPOS */ - m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]); dummy.mask = 0x0; if (vp->cfg.clpd < 0x40) { - for (c = 0; c < vp->cfg.clpd_nr; ++c) - map[m++] = vp->cfg.clpd + c; + for (c = 0; c < vp->cfg.clpd_nr; ++c) { + map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8); + ++m; + } reg[1] = (m << 8); } @@ -4146,35 +4498,37 @@ nv50_linkage_validate(struct nv50_context *nv50) /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ if (nv50->rasterizer->pipe.light_twoside) { - vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *fpi = &fp->cfg.two_side[0]; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]); } reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - for (i = 0; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; - - /* position must be mapped first */ - assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); - + for (i = 0; i < fp->cfg.in_nr; i++) { /* maybe even remove these from cfg.io */ - if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION || + fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE) continue; - /* VP outputs and vp->cfg.io are in the same order */ - for (n = 0; n < vp->info.num_outputs; ++n) { - if (vp->info.output_semantic_name[n] == sn && - vp->info.output_semantic_index[n] == si) + for (n = 0; n < vp->cfg.out_nr; ++n) + if (vp->cfg.out[n].sn == fp->cfg.in[i].sn && + vp->cfg.out[n].si == fp->cfg.in[i].si) break; - } - vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i], + (n < vp->cfg.out_nr) ? + &vp->cfg.out[n] : &dummy); + } + /* PrimitiveID either is replaced by the system value, or + * written by the geometry shader into an output register + */ + if (fp->cfg.prim_id < 0x40) { + map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8); + reg[5] = m++; } if (nv50->rasterizer->pipe.point_size_per_vertex) { @@ -4182,14 +4536,28 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[3] = (m++ << 4) | 1; } - /* now fill the stateobj */ - so = so_new(7, 57, 0); + /* now fill the stateobj (at most 28 so_data) */ + so = so_new(10, 54, 0); n = (m + 3) / 4; - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); - so_data (so, m); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); - so_datap (so, map, n); + assert(m <= 32); + if (vp->type == PIPE_SHADER_GEOMETRY) { + so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n); + so_datap (so, map, n); + } else { + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0]); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1); + so_data (so, reg[5]); + + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + } so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); so_datap (so, reg, 4); @@ -4209,8 +4577,77 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, pcrd, 8); } - so_ref(so, &nv50->state.programs); - so_ref(NULL, &so); + so_method(so, tesla, NV50TCL_GP_ENABLE, 1); + so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0); + + so_ref(so, &nv50->state.fp_linkage); + so_ref(NULL, &so); +} + +static int +construct_vp_gp_mapping(uint32_t *map32, int m, + struct nv50_program *vp, struct nv50_program *gp) +{ + uint8_t *map = (uint8_t *)map32; + int i, j, c; + + for (i = 0; i < gp->cfg.in_nr; ++i) { + uint8_t oid, mv = 0, mg = gp->cfg.in[i].mask; + + for (j = 0; j < vp->cfg.out_nr; ++j) { + if (vp->cfg.out[j].sn == gp->cfg.in[i].sn && + vp->cfg.out[j].si == gp->cfg.in[i].si) { + mv = vp->cfg.out[j].mask; + oid = vp->cfg.out[j].hw; + break; + } + } + + for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { + if (mg & mv & 1) + map[m++] = oid; + else + if (mg & 1) + map[m++] = (c == 3) ? 0x41 : 0x40; + oid += mv & 1; + } + } + return m; +} + +void +nv50_gp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *gp = nv50->geomprog; + uint32_t map[16]; + int m = 0; + + if (!gp) { + so_ref(NULL, &nv50->state.gp_linkage); + return; + } + memset(map, 0, sizeof(map)); + + m = construct_vp_gp_mapping(map, m, vp, gp); + + so = so_new(3, 24 - 3, 0); + + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]); + + assert(m <= 32); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + + m = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m); + so_datap (so, map, m); + + so_ref(so, &nv50->state.gp_linkage); + so_ref(NULL, &so); } void @@ -4227,6 +4664,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_bo_ref(NULL, &p->bo); + FREE(p->immd); nouveau_resource_free(&p->data[0]); p->translated = 0; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 461fec1d89c..1e3ad6bff05 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -16,11 +16,13 @@ struct nv50_program_exec { }; struct nv50_sreg4 { - uint8_t hw; - uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ + uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ + uint8_t id; /* tgsi index */ uint8_t mask; boolean linear; + + ubyte sn, si; /* semantic name & index */ }; struct nv50_program { @@ -49,16 +51,24 @@ struct nv50_program { uint32_t regs[4]; /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ - unsigned io_nr; - struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + unsigned in_nr, out_nr; + struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS]; + struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS]; /* FP colour inputs, VP/GP back colour outputs */ struct nv50_sreg4 two_side[2]; - /* VP only */ + /* GP only */ + unsigned vert_count; + uint8_t prim_type; + + /* VP & GP only */ uint8_t clpd, clpd_nr; uint8_t psiz; uint8_t edgeflag_in; + + /* FP & GP only */ + uint8_t prim_id; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5a4ab3508b8..57b16a355dc 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -21,7 +21,7 @@ */ #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "nv50_context.h" @@ -48,7 +48,7 @@ nv50_query_create(struct pipe_context *pipe, unsigned type) assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); q->type = type; - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 256, + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 256, 16, &q->bo); if (ret) { FREE(q); @@ -95,11 +95,13 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */ BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4); - OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); OUT_RING (chan, 0x00000000); OUT_RING (chan, 0x0100f002); - FIRE_RING (chan); + + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); + OUT_RING (chan, 0); } static boolean @@ -123,6 +125,35 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, return q->ready; } +static void +nv50_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, uint mode) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_query *q; + + if (!pq) { + BEGIN_RING(chan, tesla, NV50TCL_COND_MODE, 1); + OUT_RING (chan, NV50TCL_COND_MODE_ALWAYS); + return; + } + q = nv50_query(pq); + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + /* XXX: big fence, FIFO semaphore might be better */ + BEGIN_RING(chan, tesla, 0x0110, 1); + OUT_RING (chan, 0); + } + + BEGIN_RING(chan, tesla, NV50TCL_COND_ADDRESS_HIGH, 3); + OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, NV50TCL_COND_MODE_RES); +} + void nv50_init_query_functions(struct nv50_context *nv50) { @@ -131,4 +162,5 @@ nv50_init_query_functions(struct nv50_context *nv50) nv50->pipe.begin_query = nv50_query_begin; nv50->pipe.end_query = nv50_query_end; nv50->pipe.get_query_result = nv50_query_result; + nv50->pipe.render_condition = nv50_render_condition; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 28e2b35deaa..8c4478e483b 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -135,6 +135,16 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case NOUVEAU_CAP_HW_IDXBUF: return 0; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; @@ -167,7 +177,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) struct nv50_screen *screen = nv50_screen(pscreen); unsigned i; - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { if (screen->constbuf_parm[i]) nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); } @@ -185,6 +195,9 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->tesla); nouveau_grobj_free(&screen->eng2d); nouveau_grobj_free(&screen->m2mf); + nouveau_resource_destroy(&screen->immd_heap[0]); + nouveau_resource_destroy(&screen->parm_heap[0]); + nouveau_resource_destroy(&screen->parm_heap[1]); nouveau_screen_fini(&screen->base); FREE(screen); } @@ -238,6 +251,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; + pscreen->context_create = nv50_create; screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); @@ -329,7 +343,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(40, 84, 20); + so = so_new(47, 95, 24); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); @@ -352,10 +366,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); - so_data (so, 0x54); - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); - so_data (so, 0x54); + for (i = 0; i < 3; ++i) { + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1); + so_data (so, 0x54); + } + /* origin is top left (set to 1 for bottom left) */ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); @@ -370,8 +385,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - for (i = 0; i < 2; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4, + for (i = 0; i < 3; i++) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -406,22 +421,45 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000021 | (NV50_CB_PMISC << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); + /* bind auxiliary constbuf to immediate data bo */ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_AUX << 16) | 0x00000200); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000201 | (NV50_CB_AUX << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000221 | (NV50_CB_AUX << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PVP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PGP << 16) | 0x00000800); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000121 | (NV50_CB_PGP << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PFP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index a038a4e3c2a..2687b721277 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -9,7 +9,6 @@ struct nv50_screen { struct nouveau_winsys *nvws; - unsigned cur_pctx; struct nv50_context *cur_ctx; struct nouveau_grobj *tesla; @@ -18,10 +17,12 @@ struct nv50_screen { struct nouveau_notifier *sync; struct nouveau_bo *constbuf_misc[1]; - struct nouveau_bo *constbuf_parm[2]; + struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[2]; + struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; + + struct pipe_buffer *strm_vbuf[16]; struct nouveau_bo *tic; struct nouveau_bo *tsc; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index f19a21d5cc4..7c531b50a50 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -22,7 +22,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "tgsi/tgsi_parse.h" @@ -31,6 +31,23 @@ #include "nouveau/nouveau_stateobj.h" +static INLINE uint32_t +nv50_colormask(unsigned mask) +{ + uint32_t cmask = 0; + + if (mask & PIPE_MASK_R) + cmask |= 0x0001; + if (mask & PIPE_MASK_G) + cmask |= 0x0010; + if (mask & PIPE_MASK_B) + cmask |= 0x0100; + if (mask & PIPE_MASK_A) + cmask |= 0x1000; + + return cmask; +} + static void * nv50_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) @@ -38,28 +55,37 @@ nv50_blend_state_create(struct pipe_context *pipe, struct nouveau_stateobj *so = so_new(5, 24, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); - unsigned cmask = 0, i; + unsigned i, blend_enabled = 0; /*XXX ignored: * - dither */ - if (cso->blend_enable == 0) { - so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + if (cso->independent_blend_enable) { + for (i = 0; i < 8; ++i) { + so_data(so, cso->rt[i].blend_enable); + if (cso->rt[i].blend_enable) + blend_enabled = 1; + } + } else + if (cso->rt[0].blend_enable) { + blend_enabled = 1; for (i = 0; i < 8; i++) - so_data(so, 0); + so_data(so, 1); } else { - so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); for (i = 0; i < 8; i++) - so_data(so, 1); + so_data(so, 0); + } + if (blend_enabled) { so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); - so_data (so, nvgl_blend_eqn(cso->rgb_func)); - so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor)); - so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor)); - so_data (so, nvgl_blend_eqn(cso->alpha_func)); - so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor)); + so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_src_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_dst_factor)); + so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_src_factor)); so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); - so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_dst_factor)); } if (cso->logicop_enable == 0 ) { @@ -71,17 +97,15 @@ nv50_blend_state_create(struct pipe_context *pipe, so_data (so, nvgl_logicop_func(cso->logicop_func)); } - if (cso->colormask & PIPE_MASK_R) - cmask |= (1 << 0); - if (cso->colormask & PIPE_MASK_G) - cmask |= (1 << 4); - if (cso->colormask & PIPE_MASK_B) - cmask |= (1 << 8); - if (cso->colormask & PIPE_MASK_A) - cmask |= (1 << 12); so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8); - for (i = 0; i < 8; i++) - so_data(so, cmask); + if (cso->independent_blend_enable) + for (i = 0; i < 8; ++i) + so_data(so, nv50_colormask(cso->rt[i].colormask)); + else { + uint32_t cmask = nv50_colormask(cso->rt[0].colormask); + for (i = 0; i < 8; i++) + so_data(so, cmask); + } bso->pipe = *cso; so_ref(so, &bso->so); @@ -531,7 +555,7 @@ nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -563,7 +587,39 @@ nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_GEOMETRY; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_GEOMPROG; +} + +static void +nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -585,17 +641,21 @@ nv50_set_clip_state(struct pipe_context *pipe, static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv50_context *nv50 = nv50_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->constbuf[PIPE_SHADER_VERTEX] = buf; nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } else + if (shader == PIPE_SHADER_GEOMETRY) { + nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; + nv50->dirty |= NV50_NEW_GEOMPROG_CB; } } @@ -696,6 +756,10 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_fs_state = nv50_fp_state_bind; nv50->pipe.delete_fs_state = nv50_fp_state_delete; + nv50->pipe.create_gs_state = nv50_gp_state_create; + nv50->pipe.bind_gs_state = nv50_gp_state_bind; + nv50->pipe.delete_gs_state = nv50_gp_state_delete; + nv50->pipe.set_blend_color = nv50_set_blend_color; nv50->pipe.set_clip_state = nv50_set_clip_state; nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f83232f43cf..ee28fa63c14 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -185,10 +185,10 @@ nv50_state_emit(struct nv50_context *nv50) struct nv50_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; - /* I don't want to copy headers from the winsys. */ - screen->cur_ctx = nv50; - - if (nv50->pctx_id != screen->cur_pctx) { + /* XXX: this is racy for multiple contexts active on separate + * threads. + */ + if (screen->cur_ctx != nv50) { if (nv50->state.fb) nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; if (nv50->state.blend) @@ -199,6 +199,8 @@ nv50_state_emit(struct nv50_context *nv50) nv50->state.dirty |= NV50_NEW_VERTPROG; if (nv50->state.fragprog) nv50->state.dirty |= NV50_NEW_FRAGPROG; + if (nv50->state.geomprog) + nv50->state.dirty |= NV50_NEW_GEOMPROG; if (nv50->state.rast) nv50->state.dirty |= NV50_NEW_RASTERIZER; if (nv50->state.blend_colour) @@ -215,7 +217,7 @@ nv50_state_emit(struct nv50_context *nv50) nv50->state.dirty |= NV50_NEW_TEXTURE; if (nv50->state.vtxfmt && nv50->state.vtxbuf) nv50->state.dirty |= NV50_NEW_ARRAYS; - screen->cur_pctx = nv50->pctx_id; + screen->cur_ctx = nv50; } if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) @@ -228,9 +230,14 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog) + so_emit(chan, nv50->state.geomprog); if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - so_emit(chan, nv50->state.programs); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + so_emit(chan, nv50->state.fp_linkage); + if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG)) + && nv50->state.gp_linkage) + so_emit(chan, nv50->state.gp_linkage); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -267,6 +274,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan) so_emit_reloc_markers(chan, nv50->state.fragprog); so_emit_reloc_markers(chan, nv50->state.vtxbuf); so_emit_reloc_markers(chan, nv50->screen->static_init); + + if (nv50->state.instbuf) + so_emit_reloc_markers(chan, nv50->state.instbuf); } boolean @@ -291,9 +301,15 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB)) + nv50_geomprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - nv50_linkage_validate(nv50); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + nv50_fp_linkage_validate(nv50); + + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG)) + nv50_gp_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); @@ -400,8 +416,9 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES - + nr * 8, PIPE_SHADER_TYPES * 2); + so = so_new(1 + 5 * PIPE_SHADER_TYPES, + 1 + 19 * PIPE_SHADER_TYPES + nr * 8, + PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 6378132979e..ac0c1d02703 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -25,8 +25,8 @@ #include "nouveau/nouveau_pushbuf.h" #include "nv50_context.h" #include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_simple_screen.h" +#include "util/u_inlines.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index bef548b7286..9f1a1713032 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -155,7 +155,7 @@ static boolean nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, unsigned p) { - static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 }; + static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2, 1 }; struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -220,11 +220,8 @@ nv50_tex_validate(struct nv50_context *nv50) return; } - /* not sure if the following really do what I think: */ so_method(so, tesla, 0x1330, 1); /* flush TIC */ so_data (so, 0); - so_method(so, tesla, 0x1338, 1); /* flush texture caches */ - so_data (so, 0x20); so_ref(so, &nv50->state.tic_upload); so_ref(NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index a2f1db2914c..d08b4d7354c 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -1,6 +1,6 @@ #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f2e510fba61..ca2f8061f32 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -22,7 +22,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" @@ -40,6 +40,8 @@ nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); static boolean nv50_push_arrays(struct nv50_context *, unsigned, unsigned); +#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16) + static INLINE unsigned nv50_prim(unsigned mode) { @@ -55,6 +57,14 @@ nv50_prim(unsigned mode) case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + case PIPE_PRIM_LINES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; default: break; } @@ -152,6 +162,309 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } +/* For instanced drawing from user buffers, hitting the FIFO repeatedly + * with the same vertex data is probably worse than uploading all data. + */ +static boolean +nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i) +{ + struct nv50_screen *nscreen = nv50->screen; + struct pipe_screen *pscreen = &nscreen->base.base; + struct pipe_buffer *buf = nscreen->strm_vbuf[i]; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + uint8_t *src; + unsigned size = align(vb->buffer->size, 4096); + + if (buf && buf->size < size) + pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL); + + if (!nscreen->strm_vbuf[i]) { + nscreen->strm_vbuf[i] = pipe_buffer_create( + pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size); + buf = nscreen->strm_vbuf[i]; + } + + src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if (!src) + return FALSE; + src += vb->buffer_offset; + + size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */ + if (vb->buffer_offset + size > vb->buffer->size) + size = vb->buffer->size - vb->buffer_offset; + + pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src); + pipe_buffer_unmap(pscreen, vb->buffer); + + vb->buffer = buf; /* don't pipe_reference, this is a private copy */ + return TRUE; +} + +static void +nv50_upload_user_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + if (nv50->vbo_fifo) + nv50->dirty |= NV50_NEW_ARRAYS; + if (!(nv50->dirty & NV50_NEW_ARRAYS)) + return; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) { + if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX) + continue; + nv50_upload_vtxbuf(nv50, i); + } +} + +static void +nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + float v[4]; + + util_format_read_4f(nv50->vtxelt[i].src_format, + v, 0, data, 0, 0, 0, 1, 1); + + switch (nv50->vtxelt[i].nr_components) { + case 4: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + OUT_RINGf (chan, v[3]); + break; + case 3: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + break; + case 2: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + break; + case 1: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1); + OUT_RINGf (chan, v[0]); + break; + default: + assert(0); + break; + } +} + +static unsigned +init_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned startInstance, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b, count = 0; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + ++count; + b = nv50->vtxelt[i].vertex_buffer_index; + + pos[i] = nv50->vtxelt[i].src_offset + + nv50->vtxbuf[b].buffer_offset + + startInstance * nv50->vtxbuf[b].stride; + step[i] = startInstance % nv50->vtxelt[i].instance_divisor; + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + if (!bo->map) + nouveau_bo_map(bo, NOUVEAU_BO_RD); + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } + + return count; +} + +static unsigned +init_per_instance_arrays(struct nv50_context *nv50, + unsigned startInstance, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct nouveau_bo *bo; + struct nouveau_stateobj *so; + unsigned i, b, count = 0; + const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + if (nv50->vbo_fifo) + return init_per_instance_arrays_immd(nv50, startInstance, + pos, step); + + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + ++count; + b = nv50->vtxelt[i].vertex_buffer_index; + + pos[i] = nv50->vtxelt[i].src_offset + + nv50->vtxbuf[b].buffer_offset + + startInstance * nv50->vtxbuf[b].stride; + + if (!startInstance) { + step[i] = 0; + continue; + } + step[i] = startInstance % nv50->vtxelt[i].instance_divisor; + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); + } + + if (count && startInstance) { + so_ref (so, &nv50->state.instbuf); /* for flush notify */ + so_emit(chan, nv50->state.instbuf); + } + so_ref (NULL, &so); + + return count; +} + +static void +step_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + if (++step[i] != nv50->vtxelt[i].instance_divisor) + continue; + b = nv50->vtxelt[i].vertex_buffer_index; + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + step[i] = 0; + pos[i] += nv50->vtxbuf[b].stride; + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } +} + +static void +step_per_instance_arrays(struct nv50_context *nv50, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct nouveau_bo *bo; + struct nouveau_stateobj *so; + unsigned i, b; + const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + if (nv50->vbo_fifo) { + step_per_instance_arrays_immd(nv50, pos, step); + return; + } + + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + b = nv50->vtxelt[i].vertex_buffer_index; + + if (++step[i] == nv50->vtxelt[i].instance_divisor) { + step[i] = 0; + pos[i] += nv50->vtxbuf[b].stride; + } + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); + } + + so_ref (so, &nv50->state.instbuf); /* for flush notify */ + so_ref (NULL, &so); + + so_emit(chan, nv50->state.instbuf); +} + +static INLINE void +nv50_unmap_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); +} + +void +nv50_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count, + unsigned startInstance, unsigned instanceCount) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->tesla->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned i, nz_divisors; + unsigned step[16], pos[16]; + + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); + + nv50_state_validate(nv50); + + nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, startInstance); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + for (i = 1; i < instanceCount; i++) { + if (nz_divisors) /* any non-zero array divisors ? */ + step_per_instance_arrays(nv50, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode) | (1 << 28)); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } + nv50_unmap_vbufs(nv50); + + so_ref(NULL, &nv50->state.instbuf); +} + void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -182,6 +495,8 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + /* XXX: not sure what to do if ret != TRUE: flush and retry? */ assert(ret); @@ -210,7 +525,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -243,7 +558,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -268,7 +583,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -277,6 +592,77 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } +static INLINE void +nv50_draw_elements_inline(struct nv50_context *nv50, + void *map, unsigned indexSize, + unsigned start, unsigned count) +{ + switch (indexSize) { + case 1: + nv50_draw_elements_inline_u08(nv50, map, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(nv50, map, start, count); + break; + case 4: + nv50_draw_elements_inline_u32(nv50, map, start, count); + break; + } +} + +void +nv50_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count, + unsigned startInstance, unsigned instanceCount) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct pipe_screen *pscreen = pipe->screen; + void *map; + unsigned i, nz_divisors; + unsigned step[16], pos[16]; + + map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); + + nv50_state_validate(nv50); + + nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, startInstance); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + for (i = 1; i < instanceCount; ++i) { + if (nz_divisors) /* any non-zero array divisors ? */ + step_per_instance_arrays(nv50, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode) | (1 << 28)); + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } + nv50_unmap_vbufs(nv50); + + so_ref(NULL, &nv50->state.instbuf); +} + void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -287,7 +673,6 @@ nv50_draw_elements(struct pipe_context *pipe, struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_screen *pscreen = pipe->screen; void *map; - boolean ret; map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -300,29 +685,15 @@ nv50_draw_elements(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode)); - switch (indexSize) { - case 1: - ret = nv50_draw_elements_inline_u08(nv50, map, start, count); - break; - case 2: - ret = nv50_draw_elements_inline_u16(nv50, map, start, count); - break; - case 4: - ret = nv50_draw_elements_inline_u32(nv50, map, start, count); - break; - default: - assert(0); - ret = FALSE; - break; - } + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + pipe_buffer_unmap(pscreen, indexBuffer); - - /* XXX: what to do if ret != TRUE? Flush and retry? - */ - assert(ret); } static INLINE boolean @@ -335,23 +706,16 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, struct nouveau_stateobj *so; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_bo *bo = nouveau_bo(vb->buffer); - float *v; + float v[4]; int ret; - enum pipe_format pf = ve->src_format; - const struct util_format_description *desc; - - desc = util_format_description(pf); - assert(desc); - - if ((desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT) || - util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32) - return FALSE; ret = nouveau_bo_map(bo, NOUVEAU_BO_RD); if (ret) return FALSE; - v = (float *)(bo->map + (vb->buffer_offset + ve->src_offset)); + util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map + + (vb->buffer_offset + ve->src_offset), 0, + 0, 0, 1, 1); so = *pso; if (!so) *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); @@ -409,7 +773,7 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; - if (nv50->vertprog->cfg.edgeflag_in < 16) + if (NV50_USING_LOATHED_EDGEFLAG(nv50)) nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); @@ -437,17 +801,20 @@ nv50_vbo_validate(struct nv50_context *nv50) nv50->vbo_fifo &= ~(1 << i); continue; } - so_data(vtxfmt, hw | i); if (nv50->vbo_fifo) { + so_data (vtxfmt, hw | + (ve->instance_divisor ? (1 << 4) : i)); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); continue; } + so_data(vtxfmt, hw | i); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); - so_data (vtxbuf, 0x20000000 | vb->stride); + so_data (vtxbuf, 0x20000000 | + (ve->instance_divisor ? 0 : vb->stride)); so_reloc (vtxbuf, bo, vb->buffer_offset + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -485,7 +852,7 @@ typedef void (*pfn_push)(struct nouveau_channel *, void *); struct nv50_vbo_emitctx { pfn_push push[16]; - void *map[16]; + uint8_t *map[16]; unsigned stride[16]; unsigned nr_ve; unsigned vtx_dwords; @@ -523,19 +890,18 @@ nv50_map_vbufs(struct nv50_context *nv50) for (i = 0; i < nv50->vtxbuf_nr; ++i) { struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; - unsigned size, delta; + unsigned size = vb->stride * (vb->max_index + 1) + 16; if (nouveau_bo(vb->buffer)->map) continue; - size = vb->stride * (vb->max_index + 1); - delta = vb->buffer_offset; - + size = vb->stride * (vb->max_index + 1) + 16; + size = MIN2(size, vb->buffer->size); if (!size) - size = vb->buffer->size - vb->buffer_offset; + size = vb->buffer->size; if (nouveau_bo_map_range(nouveau_bo(vb->buffer), - delta, size, NOUVEAU_BO_RD)) + 0, size, NOUVEAU_BO_RD)) break; } @@ -546,16 +912,6 @@ nv50_map_vbufs(struct nv50_context *nv50) return FALSE; } -static INLINE void -nv50_unmap_vbufs(struct nv50_context *nv50) -{ - unsigned i; - - for (i = 0; i < nv50->vtxbuf_nr; ++i) - if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) - nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); -} - static void emit_b32_1(struct nouveau_channel *chan, void *data) { @@ -650,12 +1006,13 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, ve = &nv50->vtxelt[i]; vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - if (!(nv50->vbo_fifo & (1 << i))) + if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor) continue; n = emit->nr_ve++; emit->stride[n] = vb->stride; - emit->map[n] = nouveau_bo(vb->buffer)->map + + emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map + + vb->buffer_offset + (start * vb->stride + ve->src_offset); desc = util_format_description(ve->src_format); @@ -745,13 +1102,12 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -772,13 +1128,12 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -799,13 +1154,12 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -826,13 +1180,12 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index c14414fff6b..cdedb302209 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -23,8 +23,6 @@ #include "r300_blit.h" #include "r300_context.h" -#include "util/u_rect.h" - static void r300_blitter_save_states(struct r300_context* r300) { util_blitter_save_blend(r300->blitter, r300->blend_state.state); @@ -75,13 +73,15 @@ void r300_clear(struct pipe_context* pipe, */ struct r300_context* r300 = r300_context(pipe); + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; r300_blitter_save_states(r300); util_blitter_clear(r300->blitter, - r300->framebuffer_state.width, - r300->framebuffer_state.height, - r300->framebuffer_state.nr_cbufs, + fb->width, + fb->height, + fb->nr_cbufs, buffers, rgba, depth, stencil); } @@ -99,7 +99,7 @@ void r300_surface_copy(struct pipe_context* pipe, * is really transparent. The states will be restored by the blitter once * copying is done. */ r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state); + util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); util_blitter_save_fragment_sampler_states( r300->blitter, r300->sampler_count, (void**)r300->sampler_states); @@ -123,7 +123,7 @@ void r300_surface_fill(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state); + util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); util_blitter_fill(r300->blitter, dst, dstx, dsty, width, height, value); diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff34..92de297ef1d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a72..28084864929 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index af95bbe789c..14820ca8547 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,9 +22,6 @@ #include "draw/draw_context.h" -#include "tgsi/tgsi_scan.h" - -#include "util/u_hash_table.h" #include "util/u_memory.h" #include "util/u_simple_list.h" @@ -35,30 +32,16 @@ #include "r300_query.h" #include "r300_render.h" #include "r300_screen.h" -#include "r300_state_derived.h" #include "r300_state_invariant.h" #include "r300_texture.h" #include "r300_winsys.h" -static enum pipe_error r300_clear_hash_table(void* key, void* value, - void* data) -{ - FREE(key); - FREE(value); - return PIPE_OK; -} - static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; util_blitter_destroy(r300->blitter); - - util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table, - NULL); - util_hash_table_destroy(r300->shader_hash_table); - draw_destroy(r300->draw); /* Free the OQ BO. */ @@ -72,9 +55,10 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); - FREE(r300->rs_block); + FREE(r300->fb_state.state); + FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); - FREE(r300->vertex_info); + FREE(r300->vertex_format_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300); @@ -87,7 +71,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, { struct pipe_buffer* buf = 0; - r300_get_texture_buffer(texture, &buf, NULL); + r300_get_texture_buffer(pipe->screen, texture, &buf, NULL); return pipe->is_buffer_referenced(pipe, buf); } @@ -110,30 +94,48 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } -#define R300_INIT_ATOM(name) \ - r300->name##_state.state = NULL; \ - r300->name##_state.emit = r300_emit_##name##_state; \ - r300->name##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->name##_state); +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); static void r300_setup_atoms(struct r300_context* r300) { + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(ztop); - R300_INIT_ATOM(blend); - R300_INIT_ATOM(blend_color); - R300_INIT_ATOM(clip); - R300_INIT_ATOM(dsa); - R300_INIT_ATOM(rs); - R300_INIT_ATOM(scissor); - R300_INIT_ATOM(viewport); + R300_INIT_ATOM(invariant, 71); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(fb, 56); + R300_INIT_ATOM(rs, 25); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); + R300_INIT_ATOM(rs_block, 21); + R300_INIT_ATOM(vertex_format, 26); + + /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); } struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct radeon_winsys* radeon_winsys) + void *priv) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); + struct radeon_winsys* radeon_winsys = r300screen->radeon_winsys; if (!r300) return NULL; @@ -142,8 +144,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)radeon_winsys; r300->context.screen = screen; - - r300_init_debug(r300); + r300->context.priv = priv; r300->context.destroy = r300_destroy_context; @@ -174,16 +175,13 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; - r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, - r300_shader_key_compare); - r300_setup_atoms(r300); r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); - r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state); - r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); + r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); + r300->vertex_format_state.state = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); @@ -200,7 +198,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); - r300_emit_invariant_state(r300); + r300->invariant_state.dirty = TRUE; r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 5937f0e2cc5..84617578128 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -28,7 +28,9 @@ #include "util/u_blitter.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" + +#include "r300_screen.h" struct r300_context; @@ -36,10 +38,20 @@ struct r300_fragment_shader; struct r300_vertex_shader; struct r300_atom { + /* List pointers. */ struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ void* state; + /* Emit the state to the context. */ void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; }; struct r300_blend_state { @@ -72,13 +84,14 @@ struct r300_rs_state { struct pipe_rasterizer_state rs; uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ + uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ - uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ - uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ - uint32_t depth_scale_back; /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ - uint32_t depth_offset_back; /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ + float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ + /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ + float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ + /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ @@ -106,16 +119,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -136,15 +139,12 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 -#define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 #define R300_NEW_QUERY 0x40000000 @@ -188,6 +188,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -224,6 +230,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -260,11 +269,8 @@ struct r300_context { struct r300_query *query_current; struct r300_query query_list; - /* Shader hash table. Used to store vertex formatting information, which - * depends on the combination of both currently loaded shaders. */ - struct util_hash_table* shader_hash_table; /* Vertex formatting information. */ - struct r300_vertex_info* vertex_info; + struct r300_atom vertex_format_state; /* Various CSO state objects. */ /* Beginning of atom list. */ @@ -281,12 +287,12 @@ struct r300_context { struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; - /* Framebuffer state. We currently don't need our own version of this. */ - struct pipe_framebuffer_state framebuffer_state; + /* Framebuffer state. */ + struct r300_atom fb_state; /* Rasterizer state. */ struct r300_atom rs_state; /* RS block state. */ - struct r300_rs_block* rs_block; + struct r300_atom rs_block_state; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; @@ -302,6 +308,9 @@ struct r300_context { /* ZTOP state. */ struct r300_atom ztop_state; + /* Invariant state. This must be emitted to get the engine started. */ + struct r300_atom invariant_state; + /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; @@ -315,9 +324,10 @@ struct r300_context { uint32_t dirty_hw; /* Whether the TCL engine should be in bypass mode. */ boolean tcl_bypass; - - /** Combination of DBG_xxx flags */ - unsigned debug; + /* Whether polygon offset is enabled. */ + boolean polygon_offset_enabled; + /* Z buffer bit depth. */ + uint32_t zbuffer_bpp; }; /* Convenience cast wrapper. */ @@ -326,40 +336,24 @@ static INLINE struct r300_context* r300_context(struct pipe_context* context) return (struct r300_context*)context; } + +struct pipe_context* r300_create_context(struct pipe_screen* screen, + void *priv); + /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); -/* Debug functionality. */ - -/** - * Debug flags to disable/enable certain groups of debugging outputs. - * - * \note These may be rather coarse, and the grouping may be impractical. - * If you find, while debugging the driver, that a different grouping - * of these flags would be beneficial, just feel free to change them - * but make sure to update the documentation in r300_debug.c to reflect - * those changes. - */ -/*@{*/ -#define DBG_HELP 0x0000001 -#define DBG_FP 0x0000002 -#define DBG_VP 0x0000004 -#define DBG_CS 0x0000008 -#define DBG_DRAW 0x0000010 -#define DBG_TEX 0x0000020 -#define DBG_FALL 0x0000040 -/*@}*/ - -static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { - return (ctx->debug & flags) ? TRUE : FALSE; + return SCREEN_DBG_ON(r300_screen(ctx->context.screen), flags); } -static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags, + const char * fmt, ...) { - if (DBG_ON(ctx, flags)) { + if (CTX_DBG_ON(ctx, flags)) { va_list va; va_start(va, fmt); debug_vprintf(fmt, va); @@ -367,6 +361,8 @@ static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * f } } -void r300_init_debug(struct r300_context * ctx); +#define DBG_ON CTX_DBG_ON +#define DBG CTX_DBG #endif /* R300_CONTEXT_H */ + diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee0502..151f72b0fe4 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 2a6ed54ac9b..b881730848a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -22,8 +22,6 @@ #include "r300_context.h" -#include <ctype.h> - struct debug_option { const char * name; @@ -46,7 +44,7 @@ static struct debug_option debug_options[] = { { 0, 0, 0 } }; -void r300_init_debug(struct r300_context * ctx) +void r300_init_debug(struct r300_screen * screen) { const char * options = debug_get_option("RADEON_DEBUG", 0); boolean printhint = FALSE; @@ -64,7 +62,7 @@ void r300_init_debug(struct r300_context * ctx) for(opt = debug_options; opt->name; ++opt) { if (!strncmp(options, opt->name, length)) { - ctx->debug |= opt->flag; + screen->debug |= opt->flag; break; } } @@ -77,11 +75,11 @@ void r300_init_debug(struct r300_context * ctx) options += length; } - if (!ctx->debug) + if (!screen->debug) printhint = TRUE; } - if (printhint || ctx->debug & DBG_HELP) { + if (printhint || screen->debug & DBG_HELP) { debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" "to a comma-separated list of debug options. Available options are:\n"); for(opt = debug_options; opt->name; ++opt) { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0e5533c7902..ae83511a856 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -32,19 +32,20 @@ #include "r300_emit.h" #include "r300_fs.h" #include "r300_screen.h" -#include "r300_state_derived.h" #include "r300_state_inlines.h" -#include "r300_texture.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); + BEGIN_CS(8); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); - if (r300->framebuffer_state.nr_cbufs) { + if (fb->nr_cbufs) { OUT_CS(blend->blend_control); OUT_CS(blend->alpha_blend_control); OUT_CS(blend->color_channel_mask); @@ -111,19 +112,15 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) { struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); - - /* not needed since we use the 8bit alpha ref */ - /*if (r300screen->caps->is_r500) { - OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); - }*/ - OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - if (r300->framebuffer_state.zsbuf) { + if (fb->zsbuf) { OUT_CS(dsa->z_buffer_control); OUT_CS(dsa->z_stencil_control); } else { @@ -133,7 +130,6 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) OUT_CS(dsa->stencil_ref_mask); - /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); } @@ -167,9 +163,8 @@ static const float * get_shader_constant( vec[1] = 1.0 / tex->height0; break; - /* Texture compare-fail value. */ - /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0), right? */ + /* Texture compare-fail value. Shouldn't ever show up, but if + * it does, we'll be ready. */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; @@ -383,17 +378,14 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_fb_state(struct r300_context* r300, - struct pipe_framebuffer_state* fb) +void r300_emit_fb_state(struct r300_context* r300, void* state) { + struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_texture* tex; struct pipe_surface* surf; int i; CS_LOCALS(r300); - /* Shouldn't fail unless there is a bug in the state tracker. */ - assert(fb->nr_cbufs <= 4); - BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) + (fb->zsbuf ? 10 : 0) + 6); @@ -406,7 +398,14 @@ void r300_emit_fb_state(struct r300_context* r300, R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); /* Set the number of colorbuffers. */ - OUT_CS_REG(R300_RB3D_CCTL, R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs)); + if (fb->nr_cbufs > 1) { + OUT_CS_REG(R300_RB3D_CCTL, + R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs) | + R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE | + R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); + } else { + OUT_CS_REG(R300_RB3D_CCTL, 0x0); + } /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { @@ -419,8 +418,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -443,8 +444,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -579,32 +582,52 @@ void r300_emit_query_end(struct r300_context* r300) void r300_emit_rs_state(struct r300_context* r300, void* state) { struct r300_rs_state* rs = (struct r300_rs_state*)state; + float scale, offset; CS_LOCALS(r300); - BEGIN_CS(22); + BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0)); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); + + OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); + OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); OUT_CS(rs->point_minmax); OUT_CS(rs->line_control); - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6); - OUT_CS(rs->depth_scale_front); - OUT_CS(rs->depth_offset_front); - OUT_CS(rs->depth_scale_back); - OUT_CS(rs->depth_offset_back); + + if (rs->polygon_offset_enable) { + scale = rs->depth_scale * 12; + offset = rs->depth_offset; + + switch (r300->zbuffer_bpp) { + case 16: + offset *= 4; + break; + case 24: + offset *= 2; + break; + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); OUT_CS(rs->polygon_offset_enable); OUT_CS(rs->cull_mode); OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); - OUT_CS_REG(R300_GA_COLOR_CONTROL, rs->color_control); OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); END_CS; } -void r300_emit_rs_block_state(struct r300_context* r300, - struct r300_rs_block* rs) +void r300_emit_rs_block_state(struct r300_context* r300, void* state) { - int i; + struct r300_rs_block* rs = (struct r300_rs_block*)state; + unsigned i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -641,27 +664,65 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = fb->width; + maxy = fb->height; -void r300_emit_scissor_state(struct r300_context* r300, void* state) -{ - struct r300_scissor_state* scissor = (struct r300_scissor_state*)state; - /* XXX argfl! */ if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + /* Special case for zero-area scissor. + * + * We can't allow the variables maxx and maxy to be zero because they are + * subtracted from later in the code, which would cause emitting ~0 and + * making the kernel checker angry. + * + * Let's consider we change maxx and maxy to 1, which is effectively + * a one-pixel area. We must then change minx and miny to a number which is + * greater than 1 to get the zero area back. */ + if (!maxx || !maxy) { + minx = 2; + miny = 2; + maxx = 1; + maxy = 1; + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -680,12 +741,18 @@ void r300_emit_texture(struct r300_context* r300, filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - /* determine min/max levels */ - /* the MAX_MIP level is the largest (finest) one */ - max_level = MIN2(sampler->max_lod, tex->tex.last_level); - min_level = MIN2(sampler->min_lod, max_level); - format0 |= R300_TX_NUM_LEVELS(max_level); - filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + if (tex->is_npot) { + /* NPOT textures don't support mip filter, unfortunately. + * This prevents incorrect rendering. */ + filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + } else { + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + } BEGIN_CS(16); OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | @@ -697,27 +764,13 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } -static boolean r300_validate_aos(struct r300_context *r300) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; - int i; - - /* Check if formats and strides are aligned to the size of DWORD. */ - for (i = 0; i < r300->vertex_element_count; i++) { - if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || - util_format_get_blocksize(velem[i].src_format) % 4 != 0) { - return FALSE; - } - } - return TRUE; -} - void r300_emit_aos(struct r300_context* r300, unsigned offset) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; @@ -727,12 +780,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); - /* XXX Move this checking to a more approriate place. */ - if (!r300_validate_aos(r300)) { - /* XXX We should fallback using Draw. */ - assert(0); - } - BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(aos_count); @@ -764,64 +811,39 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) +void r300_emit_vertex_format_state(struct r300_context* r300, void* state) { - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - -void r300_emit_vertex_format_state(struct r300_context* r300) -{ - int i; + struct r300_vertex_info* vertex_info = (struct r300_vertex_info*)state; + unsigned i; CS_LOCALS(r300); DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n"); BEGIN_CS(26); - OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_info->vinfo.size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(r300->vertex_info->vinfo.hwfmt[0]); - OUT_CS(r300->vertex_info->vinfo.hwfmt[1]); + OUT_CS(vertex_info->vinfo.hwfmt[0]); + OUT_CS(vertex_info->vinfo.hwfmt[1]); OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(r300->vertex_info->vinfo.hwfmt[2]); - OUT_CS(r300->vertex_info->vinfo.hwfmt[3]); + OUT_CS(vertex_info->vinfo.hwfmt[2]); + OUT_CS(vertex_info->vinfo.hwfmt[3]); for (i = 0; i < 4; i++) { DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i, - r300->vertex_info->vinfo.hwfmt[i]); + vertex_info->vinfo.hwfmt[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]); + OUT_CS(vertex_info->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - r300->vertex_info->vap_prog_stream_cntl[i]); + vertex_info->vap_prog_stream_cntl[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]); + OUT_CS(vertex_info->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - r300->vertex_info->vap_prog_stream_cntl_ext[i]); + vertex_info->vap_prog_stream_cntl_ext[i]); } END_CS; } @@ -986,30 +1008,21 @@ static void r300_flush_pvs(struct r300_context* r300) END_CS; } -/* Emit all dirty state. */ -void r300_emit_dirty_state(struct r300_context* r300) +void r300_emit_buffer_validate(struct r300_context *r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_texture* tex; - struct r300_atom* atom; - int i, dirty_tex = 0; + unsigned i; boolean invalid = FALSE; - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ - /* XXX It would be nice to know the number of dwords we really need to - * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { - r300->context.flush(&r300->context, 0, NULL); - } - /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); validate: /* Color buffers... */ - for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) { - tex = (struct r300_texture*)r300->framebuffer_state.cbufs[i]->texture; + for (i = 0; i < fb->nr_cbufs; i++) { + tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, 0, RADEON_GEM_DOMAIN_VRAM)) { @@ -1018,8 +1031,8 @@ validate: } } /* ...depth buffer... */ - if (r300->framebuffer_state.zsbuf) { - tex = (struct r300_texture*)r300->framebuffer_state.zsbuf->texture; + if (fb->zsbuf) { + tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, 0, RADEON_GEM_DOMAIN_VRAM)) { @@ -1039,10 +1052,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -1064,6 +1079,31 @@ validate: invalid = TRUE; goto validate; } +} + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300) +{ + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_atom* atom; + unsigned i, dwords = 1024; + int dirty_tex = 0; + + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } + } + + /* Make sure we have at least 2*1024 spare dwords. */ + /* XXX It would be nice to know the number of dwords we really need to + * XXX emit. */ + while (!r300->winsys->check_cs(r300->winsys, dwords)) { + r300->context.flush(&r300->context, 0, NULL); + } if (r300->dirty_state & R300_NEW_QUERY) { r300_emit_query_start(r300); @@ -1071,7 +1111,7 @@ validate: } foreach(atom, &r300->atom_list) { - if (atom->dirty) { + if (atom->dirty || atom->always_dirty) { atom->emit(r300, atom->state); atom->dirty = FALSE; } @@ -1098,16 +1138,6 @@ validate: r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; } - if (r300->dirty_state & R300_NEW_FRAMEBUFFERS) { - r300_emit_fb_state(r300, &r300->framebuffer_state); - r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; - } - - if (r300->dirty_state & R300_NEW_RS_BLOCK) { - r300_emit_rs_block_state(r300, r300->rs_block); - r300->dirty_state &= ~R300_NEW_RS_BLOCK; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { @@ -1133,11 +1163,6 @@ validate: r300_flush_textures(r300); } - if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { - r300_emit_vertex_format_state(r300); - r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; - } - if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) { r300_flush_pvs(r300); } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 05a6bfeae86..6b96d9b57c0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -51,8 +51,7 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r500_emit_fs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_fb_state(struct r300_context* r300, - struct pipe_framebuffer_state* fb); +void r300_emit_fb_state(struct r300_context* r300, void* state); void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query); @@ -61,8 +60,7 @@ void r300_emit_query_end(struct r300_context* r300); void r300_emit_rs_state(struct r300_context* r300, void* state); -void r300_emit_rs_block_state(struct r300_context* r300, - struct r300_rs_block* rs); +void r300_emit_rs_block_state(struct r300_context* r300, void* state); void r300_emit_scissor_state(struct r300_context* r300, void* state); @@ -73,7 +71,7 @@ void r300_emit_texture(struct r300_context* r300, void r300_emit_vertex_buffer(struct r300_context* r300); -void r300_emit_vertex_format_state(struct r300_context* r300); +void r300_emit_vertex_format_state(struct r300_context* r300, void* state); void r300_emit_vertex_program_code(struct r300_context* r300, struct r300_vertex_program_code* code); @@ -95,4 +93,6 @@ void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); +void r300_emit_buffer_validate(struct r300_context *r300); + #endif /* R300_EMIT_H */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc4..e37d3092703 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -29,7 +29,6 @@ #include "r300_cs.h" #include "r300_emit.h" #include "r300_flush.h" -#include "r300_state_invariant.h" static void r300_flush(struct pipe_context* pipe, unsigned flags, @@ -37,8 +36,10 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); + (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -51,10 +52,17 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { FLUSH_CS; - r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 60ea9c171d5..75a05498eb3 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -49,12 +49,12 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: - assert(index <= ATTR_COLOR_COUNT); + assert(index < ATTR_COLOR_COUNT); fs_inputs->color[index] = i; break; case TGSI_SEMANTIC_GENERIC: - assert(index <= ATTR_GENERIC_COUNT); + assert(index < ATTR_GENERIC_COUNT); fs_inputs->generic[index] = i; break; @@ -77,17 +77,21 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { - unsigned i; + unsigned i, colorbuf_count = 0; /* Mark the outputs as not present initially */ - compiler->OutputColor = fs->info.num_outputs; + compiler->OutputColor[0] = fs->info.num_outputs; + compiler->OutputColor[1] = fs->info.num_outputs; + compiler->OutputColor[2] = fs->info.num_outputs; + compiler->OutputColor[3] = fs->info.num_outputs; compiler->OutputDepth = fs->info.num_outputs; /* Now see where they really are. */ for(i = 0; i < fs->info.num_outputs; ++i) { switch(fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: - compiler->OutputColor = i; + compiler->OutputColor[colorbuf_count] = i; + colorbuf_count++; break; case TGSI_SEMANTIC_POSITION: compiler->OutputDepth = i; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 034bfc15cf9..361813891fb 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2283,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2544,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index ee43421cdb7..cd4971ae136 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,8 +26,9 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_prim.h" @@ -114,6 +115,97 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static boolean immd_is_good_idea(struct r300_context *r300, + unsigned count) +{ + return count <= 4; +} + +static void r300_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_vertex_element* velem; + struct pipe_vertex_buffer* vbuf; + unsigned vertex_element_count = r300->vertex_element_count; + unsigned i, v, vbi, dw, elem_offset; + + /* Size of the vertex, in dwords. */ + unsigned vertex_size = 0; + + /* Offsets of the attribute, in dwords, from the start of the vertex. */ + unsigned offset[PIPE_MAX_ATTRIBS]; + + /* Size of the vertex element, in dwords. */ + unsigned size[PIPE_MAX_ATTRIBS]; + + /* Stride to the same attrib in the next vertex in the vertex buffer, + * in dwords. */ + unsigned stride[PIPE_MAX_ATTRIBS] = {0}; + + /* Mapped vertex buffers. */ + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; + + CS_LOCALS(r300); + + /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + offset[i] = velem->src_offset / 4; + size[i] = util_format_get_blocksize(velem->src_format) / 4; + vertex_size += size[i]; + vbi = velem->vertex_buffer_index; + + /* Map the buffer. */ + if (!map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + map[vbi] = (uint32_t*)pipe_buffer_map(r300->context.screen, + vbuf->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + map[vbi] += vbuf->buffer_offset / 4; + stride[vbi] = vbuf->stride / 4; + } + } + + r300_emit_dirty_state(r300); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + + /* Emit vertices. */ + for (v = 0; v < count; v++) { + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + vbi = velem->vertex_buffer_index; + elem_offset = offset[i] + stride[vbi] * (v + start); + + for (dw = 0; dw < size[i]; dw++) { + OUT_CS(map[vbi][elem_offset + dw]); + } + } + } + END_CS; + + /* Unmap buffers. */ + for (i = 0; i < vertex_element_count; i++) { + vbi = r300->vertex_element[i].vertex_buffer_index; + + if (map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + pipe_buffer_unmap(r300->context.screen, vbuf->buffer); + map[vbi] = NULL; + } + } +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -183,17 +275,18 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } - static boolean r300_setup_vertex_buffers(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_buffer *pbuf; validate: for (int i = 0; i < r300->vertex_element_count; i++) { - if (!r300->winsys->add_buffer(r300->winsys, - vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { + pbuf = vbuf[velem[i].vertex_buffer_index].buffer; + + if (!r300->winsys->add_buffer(r300->winsys, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -207,17 +300,49 @@ validate: return TRUE; } +static void r300_shorten_ubyte_elts(struct r300_context* r300, + struct pipe_buffer** elts, + unsigned count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + unsigned char *in_map; + unsigned short *out_map; + unsigned i; + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + 2 * count); + + in_map = pipe_buffer_map(screen, *elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)*in_map; + in_map++; + out_map++; + } + + pipe_buffer_unmap(screen, *elts); + pipe_buffer_unmap(screen, new_elts); + + *elts = new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { return; @@ -232,17 +357,24 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_update_derived_state(r300); + r300_emit_buffer_validate(r300); + if (!r300_setup_vertex_buffers(r300)) { return; } + if (indexSize == 1) { + r300_shorten_ubyte_elts(r300, &indexBuffer, count); + indexSize = 2; + } + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return; + goto cleanup; } r300_emit_dirty_state(r300); @@ -251,6 +383,11 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); + +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ @@ -264,7 +401,7 @@ void r300_draw_elements(struct pipe_context* pipe, } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); @@ -281,15 +418,19 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); - if (!r300_setup_vertex_buffers(r300)) { - return; - } - - r300_emit_dirty_state(r300); + r300_emit_buffer_validate(r300); - r300_emit_aos(r300, start); + if (immd_is_good_idea(r300, count)) { + r300_emit_draw_arrays_immediate(r300, mode, start, count); + } else { + if (!r300_setup_vertex_buffers(r300)) { + return; + } - r300_emit_draw_arrays(r300, mode, count); + r300_emit_dirty_state(r300); + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** @@ -321,6 +462,7 @@ void r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX, + 0, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -365,6 +507,7 @@ void r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX, + 0, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -416,7 +559,7 @@ r300_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return &r300->vertex_info->vinfo; + return (struct vertex_info*)r300->vertex_format_state.state; } static boolean r300_render_allocate_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 287664b1d20..da4ec542ade 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -20,7 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_simple_screen.h" @@ -30,6 +30,7 @@ #include "r300_texture.h" #include "radeon_winsys.h" +#include "r300_winsys.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -113,6 +114,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) * ~ C. */ return 1; + case PIPE_CAP_DUAL_SOURCE_BLEND: + return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: @@ -149,6 +152,20 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) } else { return 0; } + case PIPE_CAP_INDEP_BLEND_ENABLE: + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; default: debug_printf("r300: Implementation error: Bad param %d\n", param); @@ -183,10 +200,20 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_format(enum pipe_format format, uint32_t usage, - boolean is_r500) +static boolean r300_is_format_supported(struct pipe_screen* screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned usage, + unsigned geom_flags) { uint32_t retval = 0; + boolean is_r500 = r300_screen(screen)->caps->is_r500; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + debug_printf("r300: Implementation error: Received bogus texture " + "target %d in %s\n", target, __FUNCTION__); + return FALSE; + } switch (format) { /* Supported formats. */ @@ -194,6 +221,8 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: retval = usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | @@ -208,7 +237,8 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_YCBCR: - case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_A8L8_SRGB: case PIPE_FORMAT_A8L8_UNORM: retval = usage & PIPE_TEXTURE_USAGE_SAMPLER; break; @@ -247,28 +277,13 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: - debug_printf("r300: Note: Got unsupported format: %s in %s\n", - pf_name(format), __FUNCTION__); + SCREEN_DBG(r300_screen(screen), DBG_TEX, + "r300: Note: Got unsupported format: %s in %s\n", + pf_name(format), __FUNCTION__); return FALSE; - /* XXX These don't even exist - case PIPE_FORMAT_A32R32G32B32: - case PIPE_FORMAT_A16R16G16B16: */ - /* XXX What the deuce is UV88? (r3xx accel page 14) - debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ - - /* XXX Supported yet unimplemented r5xx formats: */ - /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */ - /* XXX Even more that don't exist - case PIPE_FORMAT_A10R10G10B10_UNORM: - case PIPE_FORMAT_A2R10G10B10_UNORM: - case PIPE_FORMAT_I10_UNORM: - debug_printf( - "r300: Warning: Got unimplemented r500 format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ + /* XXX Add all remaining gallium-supported formats, + * see util/u_format.csv. */ default: /* Unknown format... */ @@ -286,30 +301,6 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, return (retval >= usage); } -static boolean r300_is_format_supported(struct pipe_screen* pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, - unsigned geom_flags) -{ - switch (target) { - case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_3D: - case PIPE_TEXTURE_CUBE: - return check_tex_format(format, tex_usage, - r300_screen(pscreen)->caps->is_r500); - - default: - debug_printf("r300: Fatal: This is not a format target: %d\n", - target); - assert(0); - break; - } - - return FALSE; -} - static struct pipe_transfer* r300_get_tex_transfer(struct pipe_screen *screen, struct pipe_texture *texture, @@ -319,6 +310,7 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; + struct r300_screen *rscreen = r300_screen(screen); unsigned offset; offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ @@ -330,11 +322,8 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; - trans->transfer.stride = r300_texture_get_stride(tex, level); + trans->transfer.stride = r300_texture_get_stride(rscreen, tex, level); trans->transfer.usage = usage; - - /* XXX not sure whether it's required to set these two, - the driver doesn't use them */ trans->transfer.zslice = zslice; trans->transfer.face = face; @@ -389,16 +378,21 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); - if (!r300screen || !caps) + if (!r300screen || !caps) { + FREE(r300screen); + FREE(caps); return NULL; + } caps->pci_id = radeon_winsys->pci_id; caps->num_frag_pipes = radeon_winsys->gb_pipes; caps->num_z_pipes = radeon_winsys->z_pipes; + r300_init_debug(r300screen); r300_parse_chipset(caps); r300screen->caps = caps; + r300screen->radeon_winsys = radeon_winsys; r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; @@ -406,6 +400,7 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) r300screen->screen.get_param = r300_get_param; r300screen->screen.get_paramf = r300_get_paramf; r300screen->screen.is_format_supported = r300_is_format_supported; + r300screen->screen.context_create = r300_create_context; r300screen->screen.get_tex_transfer = r300_get_tex_transfer; r300screen->screen.tex_transfer_destroy = r300_tex_transfer_destroy; r300screen->screen.transfer_map = r300_transfer_map; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 2217988addd..502fbfa5a24 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -33,8 +33,13 @@ struct r300_screen { /* Parent class */ struct pipe_screen screen; + struct radeon_winsys* radeon_winsys; + /* Chipset capabilities */ struct r300_capabilities* caps; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; struct r300_transfer { @@ -57,7 +62,44 @@ r300_transfer(struct pipe_transfer* transfer) return (struct r300_transfer*)transfer; } -/* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +#define DBG_TEX 0x0000020 +#define DBG_FALL 0x0000040 +/*@}*/ + +static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) +{ + return (screen->debug & flags) ? TRUE : FALSE; +} + +static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags, + const char * fmt, ...) +{ + if (SCREEN_DBG_ON(screen, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_screen* ctx); #endif /* R300_SCREEN_H */ + diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 435e613ddf1..d07e90860c2 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -30,7 +30,6 @@ #include "tgsi/tgsi_parse.h" #include "pipe/p_config.h" -#include "pipe/internal/p_winsys_screen.h" #include "r300_context.h" #include "r300_reg.h" @@ -156,23 +155,33 @@ static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA dstA == PIPE_BLENDFACTOR_ONE); } +static unsigned bgra_cmask(unsigned mask) +{ + /* Gallium uses RGBA color ordering while R300 expects BGRA. */ + + return ((mask & PIPE_MASK_R) << 2) | + ((mask & PIPE_MASK_B) >> 2) | + (mask & (PIPE_MASK_G | PIPE_MASK_A)); +} + /* Create a new blend state based on the CSO blend state. * * This encompasses alpha blending, logic/raster ops, and blend dithering. */ static void* r300_create_blend_state(struct pipe_context* pipe, const struct pipe_blend_state* state) { + struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); - if (state->blend_enable) + if (state->rt[0].blend_enable) { - unsigned eqRGB = state->rgb_func; - unsigned srcRGB = state->rgb_src_factor; - unsigned dstRGB = state->rgb_dst_factor; + unsigned eqRGB = state->rt[0].rgb_func; + unsigned srcRGB = state->rt[0].rgb_src_factor; + unsigned dstRGB = state->rt[0].rgb_dst_factor; - unsigned eqA = state->alpha_func; - unsigned srcA = state->alpha_src_factor; - unsigned dstA = state->alpha_dst_factor; + unsigned eqA = state->rt[0].alpha_func; + unsigned srcA = state->rt[0].alpha_src_factor; + unsigned dstA = state->rt[0].alpha_dst_factor; /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, * this is just the crappy D3D naming */ @@ -289,18 +298,18 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; } - /* Color Channel Mask */ - if (state->colormask & PIPE_MASK_R) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_RED_MASK0; - } - if (state->colormask & PIPE_MASK_G) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0; - } - if (state->colormask & PIPE_MASK_B) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0; - } - if (state->colormask & PIPE_MASK_A) { - blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0; + /* Color channel masks for all MRTs. */ + blend->color_channel_mask = bgra_cmask(state->rt[0].colormask); + if (r300screen->caps->is_r500 && state->independent_blend_enable) { + if (state->rt[1].blend_enable) { + blend->color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4; + } + if (state->rt[2].blend_enable) { + blend->color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8; + } + if (state->rt[3].blend_enable) { + blend->color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12; + } } if (state->dither) { @@ -340,6 +349,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_color_state* state = (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; @@ -355,6 +365,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; r300->blend_color_state.dirty = TRUE; } @@ -365,11 +376,14 @@ static void r300_set_clip_state(struct pipe_context* pipe, if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); - r300->clip_state.dirty = TRUE; + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -427,7 +441,6 @@ static void* (r300_translate_stencil_op(state->stencil[1].zfail_op) << R300_S_BACK_ZFAIL_OP_SHIFT); - /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (caps->is_r500) { dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; @@ -446,8 +459,7 @@ static void* r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; - /* XXX figure out why emitting 10bit alpha ref causes CS to dump */ - /* always use 8bit alpha ref */ + /* We could use 10bit alpha ref but who needs that? */ dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); if (caps->is_r500) @@ -462,8 +474,10 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; r300->dsa_state.dirty = TRUE; } @@ -474,56 +488,52 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - struct pipe_scissor_state pscissor; + uint32_t zbuffer_bpp = 0; + + r300->fb_state.size = (10 * state->nr_cbufs) + + (2 * (4 - state->nr_cbufs)) + + (state->zsbuf ? 10 : 0) + 6; + + if (state->nr_cbufs > 4) { + debug_printf("r300: Implementation error: Too many MRTs in %s, " + "refusing to bind framebuffer state!\n", __FUNCTION__); + return; + } if (r300->draw) { draw_flush(r300->draw); } - r300->framebuffer_state = *state; - - /* XXX Arg. This is silly. */ - pscissor.minx = pscissor.miny = 0; - pscissor.maxx = state->width; - pscissor.maxy = state->height; - r300_set_scissor_regs(&pscissor, &scissor->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); /* Don't rely on the order of states being set for the first time. */ - r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - + /* XXX wait what */ r300->blend_state.dirty = TRUE; r300->dsa_state.dirty = TRUE; + r300->fb_state.dirty = TRUE; r300->scissor_state.dirty = TRUE; + + /* Polygon offset depends on the zbuffer bit depth. */ + if (state->zsbuf && r300->polygon_offset_enabled) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; + } + + if (r300->zbuffer_bpp != zbuffer_bpp) { + r300->zbuffer_bpp = zbuffer_bpp; + r300->rs_state.dirty = TRUE; + } + } } /* Create fragment shader state. */ @@ -559,7 +569,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_pick_fragment_shader(r300); if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { - r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + r300->vertex_format_state.dirty = TRUE; } r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -629,9 +639,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; - /* XXX I think there is something wrong with the polygon mode, - * XXX re-test when r300g is in a better shape */ - /* Enable polygon mode */ if (state->fill_cw != PIPE_POLYGON_MODE_FILL || state->fill_ccw != PIPE_POLYGON_MODE_FILL) { @@ -684,10 +691,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } if (rs->polygon_offset_enable) { - rs->depth_offset_front = rs->depth_offset_back = - fui(state->offset_units); - rs->depth_scale_front = rs->depth_scale_back = - fui(state->offset_scale); + rs->depth_offset = state->offset_units; + rs->depth_scale = state->offset_scale; } if (state->line_stipple_enable) { @@ -719,7 +724,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; + } else { + r300->tcl_bypass = FALSE; + r300->polygon_offset_enabled = FALSE; + } r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; @@ -728,7 +739,6 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->viewport_state.dirty = TRUE; /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RS_BLOCK; if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -866,11 +876,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - r300_set_scissor_regs(state, &scissor->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); r300->scissor_state.dirty = TRUE; } @@ -931,7 +939,23 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, draw_set_vertex_buffers(r300->draw, count, buffers); } - r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + r300->vertex_format_state.dirty = TRUE; +} + +static boolean r300_validate_aos(struct r300_context *r300) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + int i; + + /* Check if formats and strides are aligned to the size of DWORD. */ + for (i = 0; i < r300->vertex_element_count; i++) { + if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || + util_format_get_blocksize(velem[i].src_format) % 4 != 0) { + return FALSE; + } + } + return TRUE; } static void r300_set_vertex_elements(struct pipe_context* pipe, @@ -949,6 +973,12 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, count, elements); } + + if (!r300_validate_aos(r300)) { + /* XXX We should fallback using draw. */ + assert(0); + abort(); + } } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -989,9 +1019,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300_vertex_shader_setup_wpos(r300); } + r300->vertex_format_state.dirty = TRUE; + r300->dirty_state |= - R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS | - R300_NEW_VERTEX_FORMAT; + R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, @@ -1017,22 +1048,22 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) static void r300_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct r300_context* r300 = r300_context(pipe); void *mapped; - if (buf == NULL || buf->buffer->size == 0 || - (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) + if (buf == NULL || buf->size == 0 || + (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) { r300->shader_constants[shader].count = 0; return; } - assert((buf->buffer->size % 4 * sizeof(float)) == 0); - memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size); - r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pipe->screen, buf->buffer); + assert((buf->size % 4 * sizeof(float)) == 0); + memcpy(r300->shader_constants[shader].constants, mapped, buf->size); + r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pipe->screen, buf); if (shader == PIPE_SHADER_VERTEX) r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 192846411ba..bad9e76067c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -37,32 +37,6 @@ /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ -struct r300_shader_key { - struct r300_vertex_shader* vs; - struct r300_fragment_shader* fs; -}; - -struct r300_shader_derived_value { - struct r300_vertex_format* vformat; - struct r300_rs_block* rs_block; -}; - -unsigned r300_shader_key_hash(void* key) { - struct r300_shader_key* shader_key = (struct r300_shader_key*)key; - unsigned vs = (intptr_t)shader_key->vs; - unsigned fs = (intptr_t)shader_key->fs; - - return (vs << 16) | (fs & 0xffff); -} - -int r300_shader_key_compare(void* key1, void* key2) { - struct r300_shader_key* shader_key1 = (struct r300_shader_key*)key1; - struct r300_shader_key* shader_key2 = (struct r300_shader_key*)key2; - - return (shader_key1->vs == shader_key2->vs) && - (shader_key1->fs == shader_key2->fs); -} - static void r300_draw_emit_attrib(struct r300_context* r300, enum attrib_emit emit, enum interp_mode interp, @@ -74,7 +48,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300, output = draw_find_shader_output(r300->draw, info->output_semantic_name[index], info->output_semantic_index[index]); - draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); + draw_emit_vertex_attr( + (struct vertex_info*)r300->vertex_format_state.state, + emit, interp, output); } static void r300_draw_emit_all_attribs(struct r300_context* r300) @@ -130,7 +106,8 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) /* Update the PSC tables. */ static void r300_vertex_psc(struct r300_context* r300) { - struct r300_vertex_info *vformat = r300->vertex_info; + struct r300_vertex_info *vformat = + (struct r300_vertex_info*)r300->vertex_format_state.state; uint16_t type, swizzle; enum pipe_format format; unsigned i; @@ -182,7 +159,8 @@ static void r300_vertex_psc(struct r300_context* r300) /* Update the PSC tables for SW TCL, using Draw. */ static void r300_swtcl_vertex_psc(struct r300_context* r300) { - struct r300_vertex_info *vformat = r300->vertex_info; + struct r300_vertex_info *vformat = + (struct r300_vertex_info*)r300->vertex_format_state.state; struct vertex_info* vinfo = &vformat->vinfo; uint16_t type, swizzle; enum pipe_format format; @@ -327,7 +305,7 @@ static void r300_update_rs_block(struct r300_context* r300, struct r300_shader_semantics* vs_outputs, struct r300_shader_semantics* fs_inputs) { - struct r300_rs_block* rs = r300->rs_block; + struct r300_rs_block rs = { { 0 } }; int i, col_count = 0, tex_count = 0, fp_offset = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); @@ -350,14 +328,15 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ - rX00_rs_col(rs, col_count, i, FALSE); + rX00_rs_col(&rs, col_count, i, FALSE); /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { - rX00_rs_col_write(rs, col_count, fp_offset); + rX00_rs_col_write(&rs, col_count, fp_offset); fp_offset++; } col_count++; @@ -375,11 +354,11 @@ static void r300_update_rs_block(struct r300_context* r300, if (vs_outputs->generic[i] != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ - rX00_rs_tex(rs, tex_count, tex_count, FALSE); + rX00_rs_tex(&rs, tex_count, tex_count, FALSE); /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { - rX00_rs_tex_write(rs, tex_count, fp_offset); + rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; } tex_count++; @@ -396,11 +375,11 @@ static void r300_update_rs_block(struct r300_context* r300, if (vs_outputs->fog != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ - rX00_rs_tex(rs, tex_count, tex_count, TRUE); + rX00_rs_tex(&rs, tex_count, tex_count, TRUE); /* Write it to the FS input register if it's used by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { - rX00_rs_tex_write(rs, tex_count, fp_offset); + rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; } tex_count++; @@ -415,8 +394,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize WPOS. */ /* If the FS doesn't need it, it's not written by the VS. */ if (fs_inputs->wpos != ATTR_UNUSED) { - rX00_rs_tex(rs, tex_count, tex_count, FALSE); - rX00_rs_tex_write(rs, tex_count, fp_offset); + rX00_rs_tex(&rs, tex_count, tex_count, FALSE); + rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; tex_count++; @@ -424,51 +403,33 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { - rX00_rs_col(rs, 0, 0, TRUE); + rX00_rs_col(&rs, 0, 0, TRUE); col_count++; } - rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | + rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0); + rs.inst_count = MAX3(col_count - 1, tex_count - 1, 0); + + /* Now, after all that, see if we actually need to update the state. */ + if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { + memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); + r300->rs_block_state.dirty = TRUE; + } } -/* Update the vertex format. */ +/* Update the shader-dependant states. */ static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_vertex_info *vformat = + (struct r300_vertex_info*)r300->vertex_format_state.state; + struct vertex_info* vinfo = &vformat->vinfo; - /* - struct r300_shader_key* key; - struct r300_shader_derived_value* value; - key = CALLOC_STRUCT(r300_shader_key); - key->vs = r300->vs; - key->fs = r300->fs; - - value = (struct r300_shader_derived_value*) - util_hash_table_get(r300->shader_hash_table, (void*)key); - if (value) { - //vformat = value->vformat; - rs_block = value->rs_block; - - FREE(key); - } else { - rs_block = CALLOC_STRUCT(r300_rs_block); - value = CALLOC_STRUCT(r300_shader_derived_value); - - r300_update_rs_block(r300, rs_block); - - //value->vformat = vformat; - value->rs_block = rs_block; - util_hash_table_set(r300->shader_hash_table, - (void*)key, (void*)value); - } */ - - /* Reset structures */ - memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); - memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); - memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4); + /* Mmm, delicious hax */ + memset(r300->vertex_format_state.state, 0, sizeof(struct r300_vertex_info)); + memcpy(vinfo->hwfmt, r300->vs->hwfmt, sizeof(uint)*4); r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); @@ -476,11 +437,10 @@ static void r300_update_derived_shader_state(struct r300_context* r300) r300_vertex_psc(r300); } else { r300_draw_emit_all_attribs(r300); - draw_compute_vertex_size(&r300->vertex_info->vinfo); + draw_compute_vertex_size( + (struct vertex_info*)r300->vertex_format_state.state); r300_swtcl_vertex_psc(r300); } - - r300->dirty_state |= R300_NEW_RS_BLOCK; } static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) @@ -558,8 +518,8 @@ void r300_update_derived_state(struct r300_context* r300) { /* XXX */ if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER) || + r300->vertex_format_state.dirty || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 35be00e1b01..5df6815221f 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -81,9 +81,6 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact) return R300_BLEND_GL_CONST_COLOR; case PIPE_BLENDFACTOR_CONST_ALPHA: return R300_BLEND_GL_CONST_ALPHA; - /* XXX WTF are these? - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: */ case PIPE_BLENDFACTOR_ZERO: return R300_BLEND_GL_ZERO; case PIPE_BLENDFACTOR_INV_SRC_COLOR: @@ -98,9 +95,16 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact) return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; - /* XXX see above + + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + debug_printf("r300: Implementation error: " + "Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + default: debug_printf("r300: Unknown blend factor %d\n", blend_fact); assert(0); @@ -331,7 +335,10 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers */ + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + /* case PIPE_FORMAT_S8_UNORM: ??? */ return R300_COLOR_FORMAT_I8; /* 16-bit buffers */ case PIPE_FORMAT_R5G6B5_UNORM: @@ -408,6 +415,16 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) return R300_US_OUT_FMT_C4_8 | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; + + /* 8-bit outputs */ + case PIPE_FORMAT_A8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_A; + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_R; + /* R300_OUT_SIGN(x) */ default: debug_printf("r300: Implementation error: " "Got unsupported output format %s in %s\n", @@ -537,6 +554,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); + unsigned swizzle[4], i; assert(format); @@ -547,11 +565,26 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT)); + /* Swizzles for 8bits formats are in the reversed order, not sure why. */ + if (desc->channel[0].size == 8) { + for (i = 0; i < 4; i++) { + if (desc->swizzle[i] <= 3) { + swizzle[i] = 3 - desc->swizzle[i]; + } else { + swizzle[i] = desc->swizzle[i]; + } + } + } else { + for (i = 0; i < 4; i++) { + swizzle[i] = desc->swizzle[i]; + } + } + + return ((swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT)); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index f25f3ca217d..97927acf1b4 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -38,12 +38,12 @@ struct pipe_viewport_state r300_viewport_identity = { * * Note that eventually this should be empty, but it's useful for development * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300) +void r300_emit_invariant_state(struct r300_context* r300, void* state) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(16 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(14 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -58,8 +58,6 @@ void r300_emit_invariant_state(struct r300_context* r300) */ /* Source of fog depth */ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); - /* AA enable */ - OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); /*** Fog (FG) ***/ OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); @@ -79,7 +77,8 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + + (caps->family >= CHIP_FAMILY_RV350 ? 4 : 0)); if (caps->has_tcl) { /*Flushing PVS is required before the VAP_GB registers can be changed*/ @@ -115,10 +114,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { + + if (caps->family >= CHIP_FAMILY_RV350) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h index 05cff0d6dfe..5d1a9636545 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -25,6 +25,6 @@ struct r300_context; -void r300_emit_invariant_state(struct r300_context* r300); +void r300_emit_invariant_state(struct r300_context* r300, void* state); #endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4dc..67bf8ce13fd 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,10 +30,25 @@ #include "r300_texture.h" #include "r300_screen.h" -static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) +#include "radeon_winsys.h" + +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + +static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; + boolean is_r500 = screen->caps->is_r500; state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); @@ -67,8 +82,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) } assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); - debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width0, pt->height0, pt->last_level); + SCREEN_DBG(screen, DBG_TEX, "r300: Set texture state (%dx%d, %d levels)\n", + pt->width0, pt->height0, pt->last_level); } unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, @@ -92,33 +107,78 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { - debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, - level, tex->tex.last_level); + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + width = u_minify(tex->tex.width0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(width, tile_width); + return util_format_get_stride(tex->tex.format, width); + } else { + return align(util_format_get_stride(tex->tex.format, width), 32); + } } -static void r300_setup_miptree(struct r300_texture* tex) +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + height = u_minify(tex->tex.height0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(height, tile_height); + } + + return util_format_get_nblocksy(tex->tex.format, height); +} + +static void r300_setup_miptree(struct r300_screen* screen, + struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + SCREEN_DBG(screen, DBG_TEX, "r300: Making miptree for texture, format %s\n", + pf_name(base->format)); - stride = r300_texture_get_stride(tex, i); + for (i = 0; i <= base->last_level; i++) { + stride = r300_texture_get_stride(screen, tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -131,10 +191,10 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); - debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -150,6 +210,8 @@ static struct pipe_texture* const struct pipe_texture* template) { struct r300_texture* tex = CALLOC_STRUCT(r300_texture); + struct r300_screen* rscreen = r300_screen(screen); + struct radeon_winsys* winsys = (struct radeon_winsys*)screen->winsys; if (!tex) { return NULL; @@ -160,12 +222,16 @@ static struct pipe_texture* tex->tex.screen = screen; r300_setup_flags(tex); - r300_setup_miptree(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_miptree(rscreen, tex); + r300_setup_texture_state(rscreen, tex); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); + winsys->buffer_set_tiling(winsys, tex->buffer, + tex->pitch[0], + tex->microtile != R300_BUFFER_LINEAR, + tex->macrotile != R300_BUFFER_LINEAR); if (!tex->buffer) { FREE(tex); @@ -227,6 +293,7 @@ static struct pipe_texture* struct pipe_buffer* buffer) { struct r300_texture* tex; + struct r300_screen* rscreen = r300_screen(screen); /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || @@ -248,7 +315,7 @@ static struct pipe_texture* tex->pitch[0] = *stride / util_format_get_blocksize(base->format); r300_setup_flags(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_texture_state(rscreen, tex); pipe_buffer_reference(&tex->buffer, buffer); @@ -315,7 +382,8 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) screen->video_surface_destroy= r300_video_surface_destroy; } -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride) { @@ -327,7 +395,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = r300_texture_get_stride(tex, 0); + *stride = r300_texture_get_stride(r300_screen(screen), tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 55ceb1a5136..961bdcc5b34 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -31,31 +31,46 @@ struct r300_texture; void r300_init_screen_texture_functions(struct pipe_screen* screen); -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level); unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, unsigned zslice, unsigned face); -/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ +/* Translate a pipe_format into a useful texture format for sampling. + * + * R300_EASY_TX_FORMAT swizzles the texture. + * Note the signature of R300_EASY_TX_FORMAT: + * R300_EASY_TX_FORMAT(B, G, R, A, FORMAT); + * + * The FORMAT specifies how the texture sampler will treat the texture, and + * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { switch (format) { /* X8 */ + case PIPE_FORMAT_A8_UNORM: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8); case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); case PIPE_FORMAT_L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); + case PIPE_FORMAT_L8_SRGB: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | + R300_TX_FORMAT_GAMMA; /* X16 */ case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); case PIPE_FORMAT_R16_SNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16) | R300_TX_FORMAT_SIGNED; - case PIPE_FORMAT_Z16_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, X16); /* Y8X8 */ case PIPE_FORMAT_A8L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case PIPE_FORMAT_A8L8_SRGB: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | + R300_TX_FORMAT_GAMMA; /* W8Z8Y8X8 */ case PIPE_FORMAT_A8R8G8B8_UNORM: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); @@ -115,7 +130,8 @@ r300_video_surface(struct pipe_video_surface *pvs) #ifndef R300_WINSYS_H -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index a792c2cf989..941ec17016b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -201,6 +201,8 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { + unsigned i, j; + dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->RelAddr = src->Register.Indirect; @@ -210,6 +212,21 @@ static void transform_srcreg( dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; dst->Abs = src->Register.Absolute; dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; + + if (src->Register.File == TGSI_FILE_IMMEDIATE) { + for (i = 0; i < ttr->imms_to_swizzle_count; i++) { + if (ttr->imms_to_swizzle[i].index == src->Register.Index) { + dst->File = RC_FILE_TEMPORARY; + dst->Index = 0; + dst->Swizzle = 0; + for (j = 0; j < 4; j++) { + dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle, + tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3); + } + break; + } + } + } } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src, @@ -277,21 +294,45 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst &ttr->compiler->Program.ShadowSamplers); } -static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) +static void handle_immediate(struct tgsi_to_rc * ttr, + struct tgsi_full_immediate * imm, + unsigned index) { struct rc_constant constant; - int i; + unsigned swizzle = 0; + boolean can_swizzle = TRUE; + unsigned i; - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.Size = 4; - for(i = 0; i < 4; ++i) - constant.u.Immediate[i] = imm->u[i].Float; - rc_constants_add(&ttr->compiler->Program.Constants, &constant); + for (i = 0; i < 4; i++) { + if (imm->u[i].Float == 0.0f) { + swizzle |= RC_SWIZZLE_ZERO << (i * 3); + } else if (imm->u[i].Float == 0.5f) { + swizzle |= RC_SWIZZLE_HALF << (i * 3); + } else if (imm->u[i].Float == 1.0f) { + swizzle |= RC_SWIZZLE_ONE << (i * 3); + } else { + can_swizzle = FALSE; + break; + } + } + + if (can_swizzle) { + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index; + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle; + ttr->imms_to_swizzle_count++; + } else { + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + for(i = 0; i < 4; ++i) + constant.u.Immediate[i] = imm->u[i].Float; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); + } } void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) { struct tgsi_parse_context parser; + unsigned imm_index = 0; int i; /* Allocate constants placeholders. @@ -308,6 +349,9 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) ttr->immediate_offset = ttr->compiler->Program.Constants.Count; + ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms)); + ttr->imms_to_swizzle_count = 0; + tgsi_parse_init(&parser, tokens); while (!tgsi_parse_end_of_tokens(&parser)) { @@ -317,7 +361,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) case TGSI_TOKEN_TYPE_DECLARATION: break; case TGSI_TOKEN_TYPE_IMMEDIATE: - handle_immediate(ttr, &parser.FullToken.FullImmediate); + handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index); + imm_index++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: transform_instruction(ttr, &parser.FullToken.FullInstruction); @@ -327,6 +372,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) tgsi_parse_free(&parser); + free(ttr->imms_to_swizzle); + rc_calculate_inputs_outputs(ttr->compiler); } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 93e90ec6d2c..39b473c7bf5 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -29,11 +29,18 @@ struct tgsi_full_declaration; struct tgsi_shader_info; struct tgsi_token; +struct swizzled_imms { + unsigned index; + unsigned swizzle; +}; + struct tgsi_to_rc { struct radeon_compiler * compiler; const struct tgsi_shader_info * info; int immediate_offset; + struct swizzled_imms * imms_to_swizzle; + unsigned imms_to_swizzle_count; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 68aef70872e..fb81b2439b6 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -61,17 +61,17 @@ static void r300_shader_read_vs_outputs( break; case TGSI_SEMANTIC_COLOR: - assert(index <= ATTR_COLOR_COUNT); + assert(index < ATTR_COLOR_COUNT); vs_outputs->color[index] = i; break; case TGSI_SEMANTIC_BCOLOR: - assert(index <= ATTR_COLOR_COUNT); + assert(index < ATTR_COLOR_COUNT); vs_outputs->bcolor[index] = i; break; case TGSI_SEMANTIC_GENERIC: - assert(index <= ATTR_GENERIC_COUNT); + assert(index < ATTR_GENERIC_COUNT); vs_outputs->generic[index] = i; break; @@ -124,7 +124,8 @@ static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } @@ -182,7 +183,8 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { stream_loc[tabi++] = 2 + i; } } @@ -259,7 +261,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; - } else if (any_bcolor_used) { + } else if (any_bcolor_used || + outputs->color[1] != ATTR_UNUSED) { reg++; } } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 1ae6de70fee..f4a8ae120c8 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -29,18 +29,20 @@ extern "C" { /* The public interface header for the r300 pipe driver. * Any winsys hosting this pipe needs to implement r300_winsys and then - * call r300_create_context to start things. */ + * call r300_create_screen to start things. */ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "radeon_winsys.h" -struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct radeon_winsys* radeon_winsys); +/* Creates a new r300 screen. */ +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); -boolean r300_get_texture_buffer(struct pipe_texture* texture, + +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index bcb887a0b26..e4ac49fa85f 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -32,6 +32,7 @@ C_SOURCES = \ sp_tex_tile_cache.c \ sp_tile_cache.c \ sp_surface.c \ - sp_video_context.c + sp_video_context.c \ + sp_winsys.c include ../../Makefile.template diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index aac9edf44e6..3042e556c64 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -34,6 +34,7 @@ softpipe = env.ConvenienceLibrary( 'sp_texture.c', 'sp_tile_cache.c', 'sp_video_context.c', + 'sp_winsys.c' ]) Export('softpipe') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index f3ac6760db5..2b22ce256e6 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -35,6 +35,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_inlines.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" @@ -43,8 +44,6 @@ #include "sp_surface.h" #include "sp_tile_cache.h" #include "sp_tex_tile_cache.h" -#include "sp_texture.h" -#include "sp_winsys.h" #include "sp_query.h" @@ -112,9 +111,13 @@ softpipe_destroy( struct pipe_context *pipe ) pipe_texture_reference(&softpipe->vertex_textures[i], NULL); } - for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(&softpipe->constants[i].buffer, NULL); + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + uint j; + + for (j = 0; j < PIPE_MAX_CONSTANT_BUFFERS; j++) { + if (softpipe->constants[i][j]) { + pipe_buffer_reference(&softpipe->constants[i][j], NULL); + } } } @@ -190,7 +193,8 @@ softpipe_render_condition( struct pipe_context *pipe, struct pipe_context * -softpipe_create( struct pipe_screen *screen ) +softpipe_create_context( struct pipe_screen *screen, + void *priv ) { struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; @@ -209,6 +213,7 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.winsys = screen->winsys; softpipe->pipe.screen = screen; softpipe->pipe.destroy = softpipe_destroy; + softpipe->pipe.priv = priv; /* state setters */ softpipe->pipe.create_blend_state = softpipe_create_blend_state; @@ -256,6 +261,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; + softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 73fa744f9d4..62f9e7aad3d 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -63,7 +63,7 @@ struct softpipe_context { /** Other rendering state */ struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; @@ -92,7 +92,7 @@ struct softpipe_context { ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES]; + void *mapped_constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; /** Vertex format */ struct vertex_info vertex_info; @@ -166,5 +166,8 @@ softpipe_context( struct pipe_context *pipe ) void softpipe_reset_sampler_varients(struct softpipe_context *softpipe); +struct pipe_context * +softpipe_create_context( struct pipe_screen *, void *priv ); + #endif /* SP_CONTEXT_H */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 03d35fb3cb5..b2acc36bf7a 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -33,8 +33,8 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_simple_screen.h" +#include "util/u_inlines.h" #include "util/u_prim.h" #include "sp_context.h" @@ -49,30 +49,36 @@ static void softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; - uint i, vssize, gssize; + uint i; for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + uint j; + + for (j = 0; j < PIPE_MAX_CONSTANT_BUFFERS; j++) { + if (sp->constants[i][j] && sp->constants[i][j]->size) { + sp->mapped_constants[i][j] = ws->buffer_map(ws, + sp->constants[i][j], + PIPE_BUFFER_USAGE_CPU_READ); + } + } } - if (sp->constants[PIPE_SHADER_VERTEX].buffer) - vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size; - else - vssize = 0; - - if (sp->constants[PIPE_SHADER_GEOMETRY].buffer) - gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size; - else - gssize = 0; - - draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, - sp->mapped_constants[PIPE_SHADER_VERTEX], - vssize); - draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, - sp->mapped_constants[PIPE_SHADER_GEOMETRY], - gssize); + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + if (sp->constants[PIPE_SHADER_VERTEX][i]) { + draw_set_mapped_constant_buffer(sp->draw, + PIPE_SHADER_VERTEX, + i, + sp->mapped_constants[PIPE_SHADER_VERTEX][i], + sp->constants[PIPE_SHADER_VERTEX][i]->size); + } + if (sp->constants[PIPE_SHADER_GEOMETRY][i]) { + draw_set_mapped_constant_buffer(sp->draw, + PIPE_SHADER_GEOMETRY, + i, + sp->mapped_constants[PIPE_SHADER_GEOMETRY][i], + sp->constants[PIPE_SHADER_GEOMETRY][i]->size); + } + } } @@ -87,30 +93,67 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0); - draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + draw_set_mapped_constant_buffer(sp->draw, + PIPE_SHADER_VERTEX, + i, + NULL, + 0); + draw_set_mapped_constant_buffer(sp->draw, + PIPE_SHADER_GEOMETRY, + i, + NULL, + 0); + } for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); - sp->mapped_constants[i] = NULL; + uint j; + + for (j = 0; j < PIPE_MAX_CONSTANT_BUFFERS; j++) { + if (sp->constants[i][j] && sp->constants[i][j]->size) { + ws->buffer_unmap(ws, sp->constants[i][j]); + } + sp->mapped_constants[i][j] = NULL; + } } } +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, @@ -119,6 +162,91 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); +} + + +void +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); +} + +void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; unsigned i; @@ -128,45 +256,48 @@ softpipe_draw_range_elements(struct pipe_context *pipe, sp->reduced_api_prim = u_reduced_prim(mode); - if (sp->dirty) - softpipe_update_derived( sp ); + if (sp->dirty) { + softpipe_update_derived(sp); + } softpipe_map_transfers(sp); softpipe_map_constant_buffers(sp); - /* - * Map vertex buffers - */ + /* Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf - = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *buf; + + buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, + void *mapped_indexes; + + mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, + indexSize, + minIndex, + maxIndex, mapped_indexes); - } - else { + } else { /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); + draw_set_mapped_element_buffer_range(draw, + 0, + start, + start + count - 1, + NULL); } /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); - /* - * unmap vertex/index buffers - will cause draw module to flush - */ + /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); @@ -176,22 +307,8 @@ softpipe_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); } - /* Note: leave drawing surfaces mapped */ softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; } - - -void -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); -} diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 75dac810a12..e8952bf4fb8 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -34,11 +34,9 @@ #include "draw/draw_context.h" #include "sp_flush.h" #include "sp_context.h" -#include "sp_surface.h" #include "sp_state.h" #include "sp_tile_cache.h" #include "sp_tex_tile_cache.h" -#include "sp_winsys.h" void diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f9129506585..acee2136706 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -135,7 +135,7 @@ fs_sse_run( const struct sp_fragment_shader *base, tgsi_set_exec_mask(machine, 1, 1, 1, 1); shader->func( machine, - machine->Consts, + (const float (*)[4])machine->Consts[0], (const float (*)[4])shader->immediates, machine->InterpCoefs /*, &machine->QuadPos*/ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 7f573aef3c3..98c08eaffaf 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -526,6 +526,8 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + if(cvbr->vertex_buffer) + align_free(cvbr->vertex_buffer); sp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -541,7 +543,6 @@ sp_create_vbuf_backend(struct softpipe_context *sp) assert(sp->draw); - cvbr->base.max_indices = SP_MAX_VBUF_INDEXES; cvbr->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index d9babe81dad..d65307b7f60 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -35,7 +35,6 @@ #include "util/u_memory.h" #include "sp_context.h" #include "sp_quad.h" -#include "sp_surface.h" #include "sp_tile_cache.h" #include "sp_quad_pipe.h" @@ -224,7 +223,8 @@ logicop_quad(struct quad_stage *qs, static void blend_quad(struct quad_stage *qs, float (*quadColor)[4], - float (*dest)[4]) + float (*dest)[4], + unsigned cbuf) { static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; @@ -234,7 +234,7 @@ blend_quad(struct quad_stage *qs, /* * Compute src/first term RGB */ - switch (softpipe->blend->rgb_src_factor) { + switch (softpipe->blend->rt[cbuf].rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: VEC4_COPY(source[0], quadColor[0]); /* R */ VEC4_COPY(source[1], quadColor[1]); /* G */ @@ -384,7 +384,7 @@ blend_quad(struct quad_stage *qs, /* * Compute src/first term A */ - switch (softpipe->blend->alpha_src_factor) { + switch (softpipe->blend->rt[cbuf].alpha_src_factor) { case PIPE_BLENDFACTOR_ONE: VEC4_COPY(source[3], quadColor[3]); /* A */ break; @@ -453,7 +453,7 @@ blend_quad(struct quad_stage *qs, /* * Compute dest/second term RGB */ - switch (softpipe->blend->rgb_dst_factor) { + switch (softpipe->blend->rt[cbuf].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: /* dest = dest * 1 NO-OP, leave dest as-is */ break; @@ -593,7 +593,7 @@ blend_quad(struct quad_stage *qs, /* * Compute dest/second term A */ - switch (softpipe->blend->alpha_dst_factor) { + switch (softpipe->blend->rt[cbuf].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: /* dest = dest * 1 NO-OP, leave dest as-is */ break; @@ -656,7 +656,7 @@ blend_quad(struct quad_stage *qs, /* * Combine RGB terms */ - switch (softpipe->blend->rgb_func) { + switch (softpipe->blend->rt[cbuf].rgb_func) { case PIPE_BLEND_ADD: VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ @@ -689,7 +689,7 @@ blend_quad(struct quad_stage *qs, /* * Combine A terms */ - switch (softpipe->blend->alpha_func) { + switch (softpipe->blend->rt[cbuf].alpha_func) { case PIPE_BLEND_ADD: VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ break; @@ -711,26 +711,24 @@ blend_quad(struct quad_stage *qs, } static void -colormask_quad(struct quad_stage *qs, +colormask_quad(unsigned colormask, float (*quadColor)[4], float (*dest)[4]) { - struct softpipe_context *softpipe = qs->softpipe; - /* R */ - if (!(softpipe->blend->colormask & PIPE_MASK_R)) + if (!(colormask & PIPE_MASK_R)) COPY_4V(quadColor[0], dest[0]); /* G */ - if (!(softpipe->blend->colormask & PIPE_MASK_G)) + if (!(colormask & PIPE_MASK_G)) COPY_4V(quadColor[1], dest[1]); /* B */ - if (!(softpipe->blend->colormask & PIPE_MASK_B)) + if (!(colormask & PIPE_MASK_B)) COPY_4V(quadColor[2], dest[2]); /* A */ - if (!(softpipe->blend->colormask & PIPE_MASK_A)) + if (!(colormask & PIPE_MASK_A)) COPY_4V(quadColor[3], dest[3]); } @@ -773,12 +771,12 @@ blend_fallback(struct quad_stage *qs, if (blend->logicop_enable) { logicop_quad( qs, quadColor, dest ); } - else if (blend->blend_enable) { - blend_quad( qs, quadColor, dest ); + else if (blend->rt[cbuf].blend_enable) { + blend_quad( qs, quadColor, dest, cbuf ); } - if (blend->colormask != 0xf) - colormask_quad( qs, quadColor, dest ); + if (blend->rt[cbuf].colormask != 0xf) + colormask_quad( blend->rt[cbuf].colormask, quadColor, dest); /* Output color values */ @@ -954,23 +952,23 @@ choose_blend_quad(struct quad_stage *qs, qs->run = blend_noop; } else if (!softpipe->blend->logicop_enable && - softpipe->blend->colormask == 0xf && + softpipe->blend->rt[0].colormask == 0xf && softpipe->framebuffer.nr_cbufs == 1) { - if (!blend->blend_enable) { + if (!blend->rt[0].blend_enable) { qs->run = single_output_color; } - else if (blend->rgb_src_factor == blend->alpha_src_factor && - blend->rgb_dst_factor == blend->alpha_dst_factor && - blend->rgb_func == blend->alpha_func) + else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor && + blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor && + blend->rt[0].rgb_func == blend->rt[0].alpha_func) { - if (blend->alpha_func == PIPE_BLEND_ADD) { - if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && - blend->rgb_dst_factor == PIPE_BLENDFACTOR_ONE) { + if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) { + if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE && + blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) { qs->run = blend_single_add_one_one; } - else if (blend->rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA && - blend->rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA) + else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA && + blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA) qs->run = blend_single_add_src_alpha_inv_src_alpha; } diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 0ca86c4e1cb..a981775cbd3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -30,11 +30,11 @@ */ #include "pipe/p_defines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "tgsi/tgsi_scan.h" #include "sp_context.h" #include "sp_quad.h" -#include "sp_surface.h" #include "sp_quad_pipe.h" #include "sp_tile_cache.h" #include "sp_state.h" /* for sp_fragment_shader */ @@ -651,6 +651,20 @@ static unsigned mask_count[16] = +/** helper to get number of Z buffer bits */ +static unsigned +get_depth_bits(struct quad_stage *qs) +{ + struct pipe_surface *zsurf = qs->softpipe->framebuffer.zsbuf; + if (zsurf) + return util_format_get_component_bits(zsurf->format, + UTIL_FORMAT_COLORSPACE_ZS, 0); + else + return 0; +} + + + static void depth_test_quads_fallback(struct quad_stage *qs, struct quad_header *quads[], @@ -666,7 +680,7 @@ depth_test_quads_fallback(struct quad_stage *qs, nr = alpha_test_quads(qs, quads, nr); } - if (qs->softpipe->framebuffer.zsbuf && + if (get_depth_bits(qs) > 0 && (qs->softpipe->depth_stencil->depth.enabled || qs->softpipe->depth_stencil->stencil[0].enabled)) { @@ -884,7 +898,7 @@ choose_depth_test(struct quad_stage *qs, boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; - boolean depth = (qs->softpipe->framebuffer.zsbuf && + boolean depth = (get_depth_bits(qs) > 0 && qs->softpipe->depth_stencil->depth.enabled); unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; @@ -895,7 +909,6 @@ choose_depth_test(struct quad_stage *qs, boolean occlusion = qs->softpipe->active_query_count; - if (!alpha && !depth && !stencil) { diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1e7533d0f9e..ad04dc2afc3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -45,8 +45,6 @@ #include "sp_state.h" #include "sp_quad.h" #include "sp_quad_pipe.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" struct quad_shade_stage @@ -109,10 +107,11 @@ shade_quads(struct quad_stage *qs, struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = qss->machine; - unsigned i, pass = 0; - - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->Consts[i] = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT][i]; + } machine->InterpCoefs = quads[0]->coef; for (i = 0; i < nr; i++) { diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index bd3532de4f4..87415f43404 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -28,13 +28,14 @@ #include "util/u_memory.h" #include "util/u_simple_screen.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "sp_texture.h" #include "sp_winsys.h" #include "sp_screen.h" +#include "sp_context.h" static const char * @@ -91,6 +92,19 @@ softpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; + case PIPE_CAP_MAX_CONST_BUFFERS: + return PIPE_MAX_CONSTANT_BUFFERS; + case PIPE_CAP_MAX_CONST_BUFFER_SIZE: + return 4096 * 4 * sizeof(float); + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 1; default: return 0; } @@ -191,6 +205,7 @@ softpipe_create_screen(struct pipe_winsys *winsys) screen->base.get_param = softpipe_get_param; screen->base.get_paramf = softpipe_get_paramf; screen->base.is_format_supported = softpipe_is_format_supported; + screen->base.context_create = softpipe_create_context; softpipe_init_screen_texture_funcs(&screen->base); u_simple_screen_init(&screen->base); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 3da75364c5d..b8590a8cc2c 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -38,10 +38,8 @@ #include "sp_setup.h" #include "sp_state.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -395,6 +393,52 @@ static boolean setup_sort_vertices( struct setup_context *setup, } +/* Apply cylindrical wrapping to v0, v1, v2 coordinates, if enabled. + * Input coordinates must be in [0, 1] range, otherwise results are undefined. + * Some combinations of coordinates produce invalid results, + * but this behaviour is acceptable. + */ +static void +tri_apply_cylindrical_wrap(float v0, + float v1, + float v2, + uint cylindrical_wrap, + float output[3]) +{ + if (cylindrical_wrap) { + float delta; + + delta = v1 - v0; + if (delta > 0.5f) { + v0 += 1.0f; + } + else if (delta < -0.5f) { + v1 += 1.0f; + } + + delta = v2 - v1; + if (delta > 0.5f) { + v1 += 1.0f; + } + else if (delta < -0.5f) { + v2 += 1.0f; + } + + delta = v0 - v2; + if (delta > 0.5f) { + v2 += 1.0f; + } + else if (delta < -0.5f) { + v0 += 1.0f; + } + } + + output[0] = v0; + output[1] = v1; + output[2] = v2; +} + + /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). * The value value comes from vertex[slot][i]. @@ -420,13 +464,16 @@ static void const_coeff( struct setup_context *setup, /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. + * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively. */ -static void tri_linear_coeff( struct setup_context *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) +static void +tri_linear_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint i, + const float v[3]) { - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + float botda = v[1] - v[0]; + float majda = v[2] - v[0]; float a = setup->ebot.dy * majda - botda * setup->emaj.dy; float b = setup->emaj.dx * botda - majda * setup->ebot.dx; float dadx = a * setup->oneoverarea; @@ -449,7 +496,7 @@ static void tri_linear_coeff( struct setup_context *setup, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - coef->a0[i] = (setup->vmin[vertSlot][i] - + coef->a0[i] = (v[0] - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + dady * (setup->vmin[0][1] - setup->pixel_offset))); @@ -470,16 +517,19 @@ static void tri_linear_coeff( struct setup_context *setup, * the plane coefficients (a0, dadx, dady). * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. + * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively. */ -static void tri_persp_coeff( struct setup_context *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) +static void +tri_persp_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint i, + const float v[3]) { /* premultiply by 1/w (v[0][3] is always W): */ - float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; - float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + float mina = v[0] * setup->vmin[0][3]; + float mida = v[1] * setup->vmid[0][3]; + float maxa = v[2] * setup->vmax[0][3]; float botda = mida - mina; float majda = maxa - mina; float a = setup->ebot.dy * majda - botda * setup->emaj.dy; @@ -506,21 +556,24 @@ static void tri_persp_coeff( struct setup_context *setup, /** * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. + * X and Y are trivial, though Y may have to be inverted for OpenGL. * Z and W are copied from posCoef which should have already been computed. * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void setup_fragcoord_coeff(struct setup_context *setup, uint slot) { + struct sp_fragment_shader* spfs = setup->softpipe->fs; /*X*/ - setup->coef[slot].a0[0] = 0; + setup->coef[slot].a0[0] = spfs->pixel_center_integer ? 0.0 : 0.5; setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ - setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].a0[1] = + (spfs->origin_lower_left ? setup->softpipe->framebuffer.height : 0) + + (spfs->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; - setup->coef[slot].dady[1] = 1.0; + setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; @@ -543,11 +596,19 @@ static void setup_tri_coefficients( struct setup_context *setup ) const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; + float v[3]; /* z and w are done by linear interpolation: */ - tri_linear_coeff(setup, &setup->posCoef, 0, 2); - tri_linear_coeff(setup, &setup->posCoef, 0, 3); + v[0] = setup->vmin[0][2]; + v[1] = setup->vmid[0][2]; + v[2] = setup->vmax[0][2]; + tri_linear_coeff(setup, &setup->posCoef, 2, v); + + v[0] = setup->vmin[0][3]; + v[1] = setup->vmid[0][3]; + v[2] = setup->vmax[0][3]; + tri_linear_coeff(setup, &setup->posCoef, 3, v); /* setup interpolation for all the remaining attributes: */ @@ -561,12 +622,24 @@ static void setup_tri_coefficients( struct setup_context *setup ) const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); break; case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + for (j = 0; j < NUM_CHANNELS; j++) { + tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], + setup->vmid[vertSlot][j], + setup->vmax[vertSlot][j], + spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + v); + tri_linear_coeff(setup, &setup->coef[fragSlot], j, v); + } break; case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + for (j = 0; j < NUM_CHANNELS; j++) { + tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], + setup->vmid[vertSlot][j], + setup->vmax[vertSlot][j], + spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + v); + tri_persp_coeff(setup, &setup->coef[fragSlot], j, v); + } break; case INTERP_POS: setup_fragcoord_coeff(setup, fragSlot); @@ -776,22 +849,49 @@ void sp_setup_tri( struct setup_context *setup, } +/* Apply cylindrical wrapping to v0, v1 coordinates, if enabled. + * Input coordinates must be in [0, 1] range, otherwise results are undefined. + */ +static void +line_apply_cylindrical_wrap(float v0, + float v1, + uint cylindrical_wrap, + float output[2]) +{ + if (cylindrical_wrap) { + float delta; + + delta = v1 - v0; + if (delta > 0.5f) { + v0 += 1.0f; + } + else if (delta < -0.5f) { + v1 += 1.0f; + } + } + + output[0] = v0; + output[1] = v1; +} + /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a line. + * v[0] and v[1] are vmin and vmax, respectively. */ static void line_linear_coeff(const struct setup_context *setup, struct tgsi_interp_coef *coef, - uint vertSlot, uint i) + uint i, + const float v[2]) { - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + const float da = v[1] - v[0]; const float dadx = da * setup->emaj.dx * setup->oneoverarea; const float dady = da * setup->emaj.dy * setup->oneoverarea; coef->dadx[i] = dadx; coef->dady[i] = dady; - coef->a0[i] = (setup->vmin[vertSlot][i] - + coef->a0[i] = (v[0] - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -800,21 +900,22 @@ line_linear_coeff(const struct setup_context *setup, /** * Compute a0, dadx and dady for a perspective-corrected interpolant, * for a line. + * v[0] and v[1] are vmin and vmax, respectively. */ static void line_persp_coeff(const struct setup_context *setup, struct tgsi_interp_coef *coef, - uint vertSlot, uint i) + uint i, + const float v[2]) { - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + const float a0 = v[0] * setup->vmin[0][3]; + const float a1 = v[1] * setup->vmax[0][3]; const float da = a1 - a0; const float dadx = da * setup->emaj.dx * setup->oneoverarea; const float dady = da * setup->emaj.dy * setup->oneoverarea; coef->dadx[i] = dadx; coef->dady[i] = dady; - coef->a0[i] = (setup->vmin[vertSlot][i] - + coef->a0[i] = (a0 - (dadx * (setup->vmin[0][0] - setup->pixel_offset) + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -834,6 +935,7 @@ setup_line_coefficients(struct setup_context *setup, const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float area; + float v[2]; /* use setup->vmin, vmax to point to vertices */ if (softpipe->rasterizer->flatshade_first) @@ -854,8 +956,13 @@ setup_line_coefficients(struct setup_context *setup, /* z and w are done by linear interpolation: */ - line_linear_coeff(setup, &setup->posCoef, 0, 2); - line_linear_coeff(setup, &setup->posCoef, 0, 3); + v[0] = setup->vmin[0][2]; + v[1] = setup->vmax[0][2]; + line_linear_coeff(setup, &setup->posCoef, 2, v); + + v[0] = setup->vmin[0][3]; + v[1] = setup->vmax[0][3]; + line_linear_coeff(setup, &setup->posCoef, 3, v); /* setup interpolation for all the remaining attributes: */ @@ -869,12 +976,22 @@ setup_line_coefficients(struct setup_context *setup, const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); break; case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + for (j = 0; j < NUM_CHANNELS; j++) { + line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], + setup->vmax[vertSlot][j], + spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + v); + line_linear_coeff(setup, &setup->coef[fragSlot], j, v); + } break; case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + for (j = 0; j < NUM_CHANNELS; j++) { + line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], + setup->vmax[vertSlot][j], + spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + v); + line_persp_coeff(setup, &setup->coef[fragSlot], j, v); + } break; case INTERP_POS: setup_fragcoord_coeff(setup, fragSlot); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 9b18dac67bd..a83cae73617 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -68,6 +68,9 @@ struct sp_fragment_shader { struct tgsi_shader_info info; + boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ + boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ + void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers); @@ -139,7 +142,7 @@ void softpipe_set_clip_state( struct pipe_context *, void softpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); void *softpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); @@ -200,6 +203,24 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void softpipe_map_transfers(struct softpipe_context *sp); void diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f6856a5f691..d2eda7324ca 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -30,7 +30,6 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" -#include "draw/draw_private.h" #include "sp_context.h" #include "sp_screen.h" #include "sp_state.h" @@ -67,7 +66,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_current_shader_outputs(softpipe->draw); + const uint num = draw_num_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index aa12bb215a8..c88e2137510 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#include "util/u_inlines.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" @@ -44,6 +45,7 @@ softpipe_create_fs_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state; + unsigned i; /* debug */ if (softpipe->dump_fs) @@ -60,6 +62,13 @@ softpipe_create_fs_state(struct pipe_context *pipe, /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &state->info); + for (i = 0; i < state->info.num_properties; ++i) { + if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) + state->origin_lower_left = state->info.properties[i].data[0]; + else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) + state->pixel_center_integer = state->info.properties[i].data[0]; + } + return state; } @@ -159,18 +168,17 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) void softpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct softpipe_context *softpipe = softpipe_context(pipe); assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); + assert(index < PIPE_MAX_CONSTANT_BUFFERS); draw_flush(softpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&softpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&softpipe->constants[shader][index], buf); softpipe->dirty |= SP_NEW_CONSTANTS; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index f6154109ea8..2db6faeca46 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -30,12 +30,12 @@ #include "sp_context.h" #include "sp_state.h" -#include "sp_surface.h" #include "sp_tile_cache.h" #include "draw/draw_context.h" #include "util/u_format.h" +#include "util/u_inlines.h" /** diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 46b6991195d..b491d92ed15 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -31,7 +31,6 @@ #include "sp_context.h" #include "sp_state.h" -#include "sp_surface.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 1ae8fecacf7..473ec3e150a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -517,7 +517,6 @@ compute_lambda_1d(const struct sp_sampler_varient *samp, const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float rho = MAX2(dsdx, dsdy) * texture->width0; @@ -533,7 +532,6 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); @@ -553,7 +551,6 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index e50a76a73bc..a0b95c88846 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -32,12 +32,11 @@ * Brian Paul */ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_tile.h" #include "util/u_math.h" #include "sp_context.h" -#include "sp_surface.h" #include "sp_texture.h" #include "sp_tex_tile_cache.h" diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index a9436a33942..371c4e20251 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -31,14 +31,13 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "sp_context.h" -#include "sp_state.h" #include "sp_texture.h" #include "sp_screen.h" #include "sp_winsys.h" @@ -57,13 +56,8 @@ softpipe_texture_layout(struct pipe_screen *screen, unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned buffer_size = 0; - pt->width0 = width; - pt->height0 = height; - pt->depth0 = depth; - for (level = 0; level <= pt->last_level; level++) { spt->stride[level] = util_format_get_stride(pt->format, width); @@ -296,6 +290,10 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, assert(texture); assert(level <= texture->last_level); + /* make sure the requested region is in the image bounds */ + assert(x + w <= u_minify(texture->width0, level)); + assert(y + h <= u_minify(texture->height0, level)); + spt = CALLOC_STRUCT(softpipe_transfer); if (spt) { struct pipe_transfer *pt = &spt->base; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 112a6fe0cf3..1b50bd7ffe5 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -32,7 +32,7 @@ * Brian Paul */ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index cfa2a0b2f10..d8b5b31e954 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -26,9 +26,8 @@ **************************************************************************/ #include "sp_video_context.h" -#include <pipe/p_inlines.h> +#include <util/u_inlines.h> #include <util/u_memory.h> -#include "softpipe/sp_winsys.h" #include "softpipe/sp_texture.h" static void @@ -185,17 +184,18 @@ init_pipe_state(struct sp_mpeg12_context *ctx) ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast); ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast); - blend.blend_enable = 0; - blend.rgb_func = PIPE_BLEND_ADD; - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_func = PIPE_BLEND_ADD; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 0; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; blend.logicop_enable = 0; blend.logicop_func = PIPE_LOGICOP_CLEAR; /* Needed to allow color writes to FB, even if blending disabled */ - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].colormask = PIPE_MASK_RGBA; blend.dither = 0; ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend); ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend); @@ -249,7 +249,7 @@ sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile, ctx->base.set_decode_target = sp_mpeg12_set_decode_target; ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix; - ctx->pipe = softpipe_create(screen); + ctx->pipe = screen->context_create(screen, NULL); if (!ctx->pipe) { FREE(ctx); return NULL; diff --git a/src/gallium/drivers/softpipe/sp_winsys.c b/src/gallium/drivers/softpipe/sp_winsys.c new file mode 100644 index 00000000000..f6598927d35 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.c @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * Malloc softpipe winsys. Uses malloc for all memory allocations. + * + * @author Keith Whitwell + * @author Brian Paul + * @author Jose Fonseca + */ + + +#include "util/u_simple_screen.h"/* port to just p_screen */ +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "softpipe/sp_winsys.h" + + +struct st_softpipe_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; + void *mapped; +}; + + +/** Cast wrapper */ +static INLINE struct st_softpipe_buffer * +st_softpipe_buffer( struct pipe_buffer *buf ) +{ + return (struct st_softpipe_buffer *)buf; +} + + +static void * +st_softpipe_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = st_softpipe_buf->data; + return st_softpipe_buf->mapped; +} + + +static void +st_softpipe_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = NULL; +} + + +static void +st_softpipe_buffer_destroy(struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); + + if (oldBuf->data) { + if (!oldBuf->userBuffer) + align_free(oldBuf->data); + + oldBuf->data = NULL; + } + + FREE(oldBuf); +} + + +static void +st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ +} + + + +static const char * +st_softpipe_get_name(struct pipe_winsys *winsys) +{ + return "softpipe"; +} + + +static struct pipe_buffer * +st_softpipe_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + buffer->data = align_malloc(size, alignment); + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +st_softpipe_user_buffer_create(struct pipe_winsys *winsys, + void *ptr, + unsigned bytes) +{ + struct st_softpipe_buffer *buffer; + + buffer = CALLOC_STRUCT(st_softpipe_buffer); + if(!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +static struct pipe_buffer * +st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned tex_usage, + unsigned *stride) +{ + const unsigned alignment = 64; + unsigned nblocksy; + + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); +} + + +static void +st_softpipe_fence_reference(struct pipe_winsys *winsys, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +st_softpipe_fence_signalled(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +st_softpipe_fence_finish(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static void +st_softpipe_destroy(struct pipe_winsys *winsys) +{ + FREE(winsys); +} + + +struct pipe_screen * +softpipe_create_screen_malloc(void) +{ + static struct pipe_winsys *winsys; + struct pipe_screen *screen; + + winsys = CALLOC_STRUCT(pipe_winsys); + if(!winsys) + return NULL; + + winsys->destroy = st_softpipe_destroy; + + winsys->buffer_create = st_softpipe_buffer_create; + winsys->user_buffer_create = st_softpipe_user_buffer_create; + winsys->buffer_map = st_softpipe_buffer_map; + winsys->buffer_unmap = st_softpipe_buffer_unmap; + winsys->buffer_destroy = st_softpipe_buffer_destroy; + + winsys->surface_buffer_create = st_softpipe_surface_buffer_create; + + winsys->fence_reference = st_softpipe_fence_reference; + winsys->fence_signalled = st_softpipe_fence_signalled; + winsys->fence_finish = st_softpipe_fence_finish; + + winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; + winsys->get_name = st_softpipe_get_name; + + screen = softpipe_create_screen(winsys); + if(!screen) + st_softpipe_destroy(winsys); + + return screen; +} diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index f203ded29ee..6e3920c49b2 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -47,12 +47,18 @@ struct pipe_texture; struct pipe_buffer; -struct pipe_context *softpipe_create( struct pipe_screen * ); +/** + * Create a softpipe screen that uses the + * given winsys for allocating buffers. + */ +struct pipe_screen *softpipe_create_screen( struct pipe_winsys * ); -struct pipe_screen * -softpipe_create_screen(struct pipe_winsys *); - +/** + * Create a softpipe screen that uses + * regular malloc to create all its buffers. + */ +struct pipe_screen *softpipe_create_screen_malloc(void); boolean softpipe_get_texture_buffer( struct pipe_texture *texture, diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index af99c9de37c..d499ae6acc9 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -26,7 +26,7 @@ #include "svga_cmd.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" #include "util/u_bitmask.h" @@ -126,7 +126,8 @@ svga_is_buffer_referenced( struct pipe_context *pipe, } -struct pipe_context *svga_context_create( struct pipe_screen *screen ) +struct pipe_context *svga_context_create( struct pipe_screen *screen, + void *priv ) { struct svga_screen *svgascreen = svga_screen(screen); struct svga_context *svga = NULL; @@ -138,6 +139,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->pipe.winsys = screen->winsys; svga->pipe.screen = screen; + svga->pipe.priv = priv; svga->pipe.destroy = svga_destroy; svga->pipe.clear = svga_clear; @@ -215,7 +217,6 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->state.hw_draw.num_views = 0; svga->dirty = ~0; - svga->state.white_fs_id = SVGA3D_INVALID_ID; LIST_INITHEAD(&svga->dirty_buffers); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index e2a96034d16..f9a641c6df6 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -326,10 +326,6 @@ struct svga_context unsigned texture_timestamp; - /* Internally generated shaders: - */ - unsigned white_fs_id; - /* */ struct svga_sw_state sw; @@ -429,6 +425,10 @@ void svga_context_flush( struct svga_context *svga, void svga_hwtnl_flush_retry( struct svga_context *svga ); +struct pipe_context * +svga_context_create(struct pipe_screen *screen, + void *priv); + /*********************************************************************** * Inline conversion functions. These are better-typed than the diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index ca73cf9d5a3..f4d2d8992c8 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -24,7 +24,7 @@ **********************************************************/ #include "pipe/p_compiler.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_math.h" diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 75492dffca2..6192aa96b11 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -25,8 +25,7 @@ #include "svga_cmd.h" -#include "pipe/p_inlines.h" -#include "util/u_prim.h" +#include "util/u_inlines.h" #include "indices/u_indices.h" #include "svga_hw_reg.h" diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index 167d8178315..e8097d82f16 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -23,8 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" -#include "util/u_prim.h" +#include "util/u_inlines.h" #include "util/u_upload_mgr.h" #include "indices/u_indices.h" diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c index 855d228755f..9dd6fb068c5 100644 --- a/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -23,13 +23,12 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_hw_reg.h" @@ -182,15 +181,15 @@ svga_create_blend_state(struct pipe_context *pipe, } } else { - blend->rt[i].blend_enable = templ->blend_enable; + blend->rt[i].blend_enable = templ->rt[0].blend_enable; - if (templ->blend_enable) { - blend->rt[i].srcblend = svga_translate_blend_factor(templ->rgb_src_factor); - blend->rt[i].dstblend = svga_translate_blend_factor(templ->rgb_dst_factor); - blend->rt[i].blendeq = svga_translate_blend_func(templ->rgb_func); - blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->alpha_src_factor); - blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->alpha_dst_factor); - blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->alpha_func); + if (templ->rt[0].blend_enable) { + blend->rt[i].srcblend = svga_translate_blend_factor(templ->rt[0].rgb_src_factor); + blend->rt[i].dstblend = svga_translate_blend_factor(templ->rt[0].rgb_dst_factor); + blend->rt[i].blendeq = svga_translate_blend_func(templ->rt[0].rgb_func); + blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_src_factor); + blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_dst_factor); + blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->rt[0].alpha_func); if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend || blend->rt[i].dstblend_alpha != blend->rt[i].dstblend || @@ -201,7 +200,7 @@ svga_create_blend_state(struct pipe_context *pipe, } } - blend->rt[i].writemask = templ->colormask; + blend->rt[i].writemask = templ->rt[0].colormask; } return blend; diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c index 10e7a121892..73a0cd6b3a8 100644 --- a/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -23,16 +23,12 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" -#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "svga_context.h" -#include "svga_state.h" -#include "svga_hw_reg.h" -#include "svga_cmd.h" /*********************************************************************** * Constant buffers @@ -49,7 +45,7 @@ struct svga_constbuf static void svga_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct svga_context *svga = svga_context(pipe); @@ -57,7 +53,7 @@ static void svga_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); pipe_buffer_reference( &svga->curr.cb[shader], - buf->buffer ); + buf ); if (shader == PIPE_SHADER_FRAGMENT) svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c index df636c08a05..12bbd233a58 100644 --- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c +++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -23,13 +23,12 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_hw_reg.h" diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 0f24ef4ee8d..f00cf23935e 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -25,7 +25,7 @@ #include "svga_cmd.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_time.h" #include "indices/u_indices.h" @@ -33,7 +33,6 @@ #include "svga_hw_reg.h" #include "svga_context.h" #include "svga_screen.h" -#include "svga_winsys.h" #include "svga_draw.h" #include "svga_state.h" #include "svga_swtnl.h" @@ -217,11 +216,6 @@ svga_draw_range_elements( struct pipe_context *pipe, } if (SVGA_DEBUG & DEBUG_FLUSH) { - static unsigned id; - debug_printf("%s %d\n", __FUNCTION__, id++); - if (id > 1300) - util_time_sleep( 2000 ); - svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); } diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index 0becb0765ac..7fa2205ae5f 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -27,14 +27,8 @@ #include "svga_screen.h" #include "svga_screen_texture.h" #include "svga_context.h" -#include "svga_winsys.h" -#include "svga_draw.h" #include "svga_debug.h" -#include "svga_hw_reg.h" - - - static void svga_flush( struct pipe_context *pipe, unsigned flags, diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index 5f1213e46a3..b71bc66552f 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -23,20 +23,17 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_text.h" #include "svga_screen.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_tgsi.h" #include "svga_hw_reg.h" #include "svga_cmd.h" -#include "svga_draw.h" #include "svga_debug.h" diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index 58cb1e6e230..49b43bebc29 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -25,14 +25,10 @@ #include "svga_cmd.h" +#include "util/u_inlines.h" + #include "svga_context.h" #include "svga_screen_texture.h" -#include "svga_state.h" -#include "svga_winsys.h" - -#include "svga_hw_reg.h" - - static void svga_set_scissor_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 01336b0a2c3..08283e37317 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -32,7 +32,6 @@ #include "svga_screen.h" #include "svga_screen_buffer.h" #include "svga_winsys.h" -#include "svga_draw.h" #include "svga_debug.h" diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 09ccb718840..35717788677 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -24,13 +24,12 @@ **********************************************************/ #include "draw/draw_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_hw_reg.h" diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 460a101f8c0..b70081343d1 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -32,9 +32,6 @@ #include "svga_context.h" #include "svga_screen_texture.h" -#include "svga_state.h" - -#include "svga_hw_reg.h" #include "svga_debug.h" diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 42f290d162a..ffc0f99565b 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -23,19 +23,14 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" -#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "svga_screen.h" #include "svga_screen_buffer.h" #include "svga_context.h" -#include "svga_state.h" -#include "svga_winsys.h" - -#include "svga_hw_reg.h" static void svga_set_vertex_buffers(struct pipe_context *pipe, diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index 7e6ab576add..de8c919e125 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -24,7 +24,7 @@ **********************************************************/ #include "draw/draw_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_bitmask.h" @@ -33,7 +33,6 @@ #include "svga_screen.h" #include "svga_context.h" -#include "svga_state.h" #include "svga_tgsi.h" #include "svga_hw_reg.h" #include "svga_cmd.h" diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 45b5d85ae22..8143be50244 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -24,7 +24,7 @@ **********************************************************/ #include "util/u_memory.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_string.h" #include "util/u_math.h" @@ -33,10 +33,8 @@ #include "svga_screen.h" #include "svga_screen_texture.h" #include "svga_screen_buffer.h" -#include "svga_cmd.h" #include "svga_debug.h" -#include "svga_hw_reg.h" #include "svga3d_shaderdefs.h" @@ -146,6 +144,13 @@ svga_get_paramf(struct pipe_screen *screen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */ return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + default: return 0; } @@ -361,6 +366,7 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->get_param = svga_get_param; screen->get_paramf = svga_get_paramf; screen->is_format_supported = svga_is_format_supported; + screen->context_create = svga_context_create; screen->fence_reference = svga_fence_reference; screen->fence_signalled = svga_fence_signalled; screen->fence_finish = svga_fence_finish; @@ -393,8 +399,6 @@ svga_screen_create(struct svga_winsys_screen *sws) pipe_mutex_init(svgascreen->tex_mutex); pipe_mutex_init(svgascreen->swc_mutex); - LIST_INITHEAD(&svgascreen->cached_buffers); - svga_screen_cache_init(svgascreen); return screen; diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h index b94ca7fc1ca..9dc229b0a87 100644 --- a/src/gallium/drivers/svga/svga_screen.h +++ b/src/gallium/drivers/svga/svga_screen.h @@ -28,7 +28,7 @@ #include "pipe/p_screen.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_double_list.h" @@ -68,12 +68,6 @@ struct svga_screen pipe_mutex tex_mutex; pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */ - /** - * List of buffers with cached GMR. Ordered from the most recently used to - * the least recently used - */ - struct list_head cached_buffers; - struct svga_host_surface_cache cache; }; diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index 58a1aba464b..c9e9bef5406 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -27,8 +27,8 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_thread.h" +#include "util/u_inlines.h" +#include "os/os_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -113,68 +113,9 @@ svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf) if(sbuf->hw.buf) { sws->buffer_destroy(sws, sbuf->hw.buf); sbuf->hw.buf = NULL; - assert(sbuf->head.prev && sbuf->head.next); - LIST_DEL(&sbuf->head); -#ifdef DEBUG - sbuf->head.next = sbuf->head.prev = NULL; -#endif } } -static INLINE enum pipe_error -svga_buffer_backup(struct svga_screen *ss, struct svga_buffer *sbuf) -{ - if (sbuf->hw.buf && sbuf->hw.num_ranges) { - void *src; - - if (!sbuf->swbuf) - sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment); - if (!sbuf->swbuf) - return PIPE_ERROR_OUT_OF_MEMORY; - - src = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, - PIPE_BUFFER_USAGE_CPU_READ); - if (!src) - return PIPE_ERROR; - - memcpy(sbuf->swbuf, src, sbuf->base.size); - ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf); - } - - return PIPE_OK; -} - -/** - * Try to make GMR space available by freeing the hardware storage of - * unmapped - */ -boolean -svga_buffer_free_cached_hw_storage(struct svga_screen *ss) -{ - struct list_head *curr; - struct svga_buffer *sbuf; - enum pipe_error ret = PIPE_OK; - - curr = ss->cached_buffers.prev; - - /* free the least recently used buffer's hw storage which is not mapped */ - do { - if(curr == &ss->cached_buffers) - return FALSE; - - sbuf = LIST_ENTRY(struct svga_buffer, curr, head); - - curr = curr->prev; - if (sbuf->map.count == 0) - ret = svga_buffer_backup(ss, sbuf); - - } while(sbuf->map.count != 0 || ret != PIPE_OK); - - svga_buffer_destroy_hw_storage(ss, sbuf); - - return TRUE; -} - struct svga_winsys_buffer * svga_winsys_buffer_create( struct svga_screen *ss, unsigned alignment, @@ -195,12 +136,6 @@ svga_winsys_buffer_create( struct svga_screen *ss, svga_screen_flush(ss, NULL); buf = sws->buffer_create(sws, alignment, usage, size); - SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "evicting buffers to find %d bytes GMR\n", - size); - - /* Try evicing all buffer storage */ - while(!buf && svga_buffer_free_cached_hw_storage(ss)) - buf = sws->buffer_create(sws, alignment, usage, size); } return buf; @@ -226,8 +161,6 @@ svga_buffer_create_hw_storage(struct svga_screen *ss, return PIPE_ERROR_OUT_OF_MEMORY; assert(!sbuf->needs_flush); - assert(!sbuf->head.prev && !sbuf->head.next); - LIST_ADD(&sbuf->head, &ss->cached_buffers); } return PIPE_OK; @@ -311,7 +244,6 @@ static void svga_buffer_upload_flush(struct svga_context *svga, struct svga_buffer *sbuf) { - struct svga_screen *ss = svga_screen(svga->pipe.screen); SVGA3dCopyBox *boxes; unsigned i; @@ -348,13 +280,16 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(sbuf->head.prev && sbuf->head.next); LIST_DEL(&sbuf->head); +#ifdef DEBUG + sbuf->head.next = sbuf->head.prev = NULL; +#endif sbuf->needs_flush = FALSE; - /* XXX: do we care about cached_buffers any more ?*/ - LIST_ADD(&sbuf->head, &ss->cached_buffers); sbuf->hw.svga = NULL; sbuf->hw.boxes = NULL; + sbuf->host_written = TRUE; + /* Decrement reference count */ pipe_reference(&(sbuf->base.reference), NULL); sbuf = NULL; @@ -437,17 +372,17 @@ svga_buffer_map_range( struct pipe_screen *screen, } else { if(!sbuf->hw.buf) { - struct svga_winsys_surface *handle = sbuf->handle; - if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) return NULL; /* Populate the hardware storage if the host surface pre-existed */ - if((usage & PIPE_BUFFER_USAGE_CPU_READ) && handle) { + if(sbuf->host_written) { SVGA3dSurfaceDMAFlags flags; enum pipe_error ret; struct pipe_fence_handle *fence = NULL; + assert(sbuf->handle); + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p (buffer), bytes %u - %u\n", sbuf->handle, 0, sbuf->base.size); @@ -478,17 +413,6 @@ svga_buffer_map_range( struct pipe_screen *screen, sws->fence_reference(sws, &fence, NULL); } } - else { - if((usage & PIPE_BUFFER_USAGE_CPU_READ) && !sbuf->needs_flush) { - /* We already had the hardware storage but we would have to issue - * a download if we hadn't, so move the buffer to the begginning - * of the LRU list. - */ - assert(sbuf->head.prev && sbuf->head.next); - LIST_DEL(&sbuf->head); - LIST_ADD(&sbuf->head, &ss->cached_buffers); - } - } map = sws->buffer_map(sws, sbuf->hw.buf, usage); } @@ -572,10 +496,8 @@ svga_buffer_destroy( struct pipe_buffer *buf ) assert(!sbuf->needs_flush); - if(sbuf->handle) { - SVGA_DBG(DEBUG_DMA, "release sid %p sz %d\n", sbuf->handle, sbuf->base.size); - svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle); - } + if(sbuf->handle) + svga_buffer_destroy_host_surface(ss, sbuf); if(sbuf->hw.buf) svga_buffer_destroy_hw_storage(ss, sbuf); @@ -595,6 +517,9 @@ svga_buffer_create(struct pipe_screen *screen, struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf; + assert(size); + assert(alignment); + sbuf = CALLOC_STRUCT(svga_buffer); if(!sbuf) goto error1; @@ -755,8 +680,7 @@ svga_buffer_handle(struct svga_context *svga, assert(sbuf->hw.svga == svga); sbuf->needs_flush = TRUE; - assert(sbuf->head.prev && sbuf->head.next); - LIST_DEL(&sbuf->head); + assert(!sbuf->head.prev && !sbuf->head.next); LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); } diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h index 5d7af5a7c50..448ac107c7f 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.h +++ b/src/gallium/drivers/svga/svga_screen_buffer.h @@ -135,6 +135,11 @@ struct svga_buffer */ struct svga_winsys_surface *handle; + /** + * Whether the host has been ever written. + */ + boolean host_written; + struct { unsigned count; boolean writing; @@ -178,9 +183,6 @@ svga_buffer_handle(struct svga_context *svga, void svga_context_flush_buffers(struct svga_context *svga); -boolean -svga_buffer_free_cached_hw_storage(struct svga_screen *ss); - struct svga_winsys_buffer * svga_winsys_buffer_create(struct svga_screen *ss, unsigned alignment, diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h index f5aa740d408..62156e3f522 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.h +++ b/src/gallium/drivers/svga/svga_screen_cache.h @@ -31,7 +31,7 @@ #include "svga_reg.h" #include "svga3d_reg.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_double_list.h" diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 2224c2d3945..12f3531a1df 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -27,8 +27,8 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_thread.h" +#include "util/u_inlines.h" +#include "os/os_thread.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -205,7 +205,7 @@ svga_transfer_dma(struct svga_transfer *st, if(transfer == SVGA3D_READ_HOST_VRAM) { svga_screen_flush(screen, &fence); sws->fence_finish(sws, fence, 0); - //sws->fence_reference(sws, &fence, NULL); + sws->fence_reference(sws, &fence, NULL); } } else { @@ -235,7 +235,7 @@ svga_transfer_dma(struct svga_transfer *st, if(y) { svga_screen_flush(screen, &fence); sws->fence_finish(sws, fence, 0); - //sws->fence_reference(sws, &fence, NULL); + sws->fence_reference(sws, &fence, NULL); } hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_WRITE); @@ -306,11 +306,19 @@ svga_texture_create(struct pipe_screen *screen, tex->key.numFaces = 1; } + tex->key.cachable = 1; + if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; - if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if(templat->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + tex->key.cachable = 0; + } + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) { tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT; + tex->key.cachable = 0; + } /* * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot @@ -333,8 +341,6 @@ svga_texture_create(struct pipe_screen *screen, if(tex->key.format == SVGA3D_FORMAT_INVALID) goto error2; - tex->key.cachable = 1; - SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle); tex->handle = svga_screen_surface_create(svgascreen, &tex->key); if (tex->handle) @@ -416,6 +422,62 @@ svga_texture_blanket(struct pipe_screen * screen, } +struct pipe_texture * +svga_screen_texture_wrap_surface(struct pipe_screen *screen, + struct pipe_texture *base, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf) +{ + struct svga_texture *tex; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth0 != 1) { + return NULL; + } + + if (!srf) + return NULL; + + if (svga_translate_format(base->format) != format) { + unsigned f1 = svga_translate_format(base->format); + unsigned f2 = format; + + /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */ + if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) || + (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) || + (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) { + debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2); + return NULL; + } + } + + tex = CALLOC_STRUCT(svga_texture); + if (!tex) + return NULL; + + tex->base = *base; + + + if (format == 1) + tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; + else if (format == 2) + tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM; + + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf); + + tex->key.cachable = 0; + tex->handle = srf; + + return &tex->base; +} + + static void svga_texture_destroy(struct pipe_texture *pt) { diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h index 89ae24219fd..43853d48f88 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.h +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -29,6 +29,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "util/u_inlines.h" #include "svga_screen_cache.h" struct pipe_context; diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index 6b0e511cec1..bb92f818eae 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "svga_context.h" diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index cfdcae4ee4a..b4cafb8f219 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" @@ -32,8 +32,6 @@ #include "svga_cmd.h" #include "svga_debug.h" -#include "svga_hw_reg.h" - /*********************************************************************** * Hardware state update diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index d29f3762d2b..2973444d0ab 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_bitmask.h" @@ -81,8 +81,10 @@ static enum pipe_error compile_fs( struct svga_context *svga, } result->id = util_bitmask_add(svga->fs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, @@ -106,70 +108,6 @@ fail: return ret; } -/* The blend workaround for simulating logicop xor behaviour requires - * that the incoming fragment color be white. This change achieves - * that by hooking up a hard-wired fragment shader that just emits - * color 1,1,1,1 - * - * This is a slightly incomplete solution as it assumes that the - * actual bound shader has no other effects beyond generating a - * fragment color. In particular shaders containing TEXKIL and/or - * depth-write will not have the correct behaviour, nor will those - * expecting to use alphatest. - * - * These are avoidable issues, but they are not much worse than the - * unavoidable ones associated with this technique, so it's not clear - * how much effort should be expended trying to resolve them - the - * ultimate result will still not be correct in most cases. - * - * Shader below was generated with: - * SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt - */ -static int emit_white_fs( struct svga_context *svga ) -{ - int ret = PIPE_ERROR; - - /* ps_3_0 - * def c0, 1.000000, 0.000000, 0.000000, 1.000000 - * mov oC0, c0.x - * end - */ - static const unsigned white_tokens[] = { - 0xffff0300, - 0x05000051, - 0xa00f0000, - 0x3f800000, - 0x00000000, - 0x00000000, - 0x3f800000, - 0x02000001, - 0x800f0800, - 0xa0000000, - 0x0000ffff, - }; - - assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); - svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); - if(svga->state.white_fs_id == SVGA3D_INVALID_ID) - goto no_fs_id; - - ret = SVGA3D_DefineShader(svga->swc, - svga->state.white_fs_id, - SVGA3D_SHADERTYPE_PS, - white_tokens, - sizeof(white_tokens)); - if (ret) - goto no_definition; - - return 0; - -no_definition: - util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); - svga->state.white_fs_id = SVGA3D_INVALID_ID; -no_fs_id: - return ret; -} - /* SVGA_NEW_TEXTURE_BINDING * SVGA_NEW_RAST @@ -197,6 +135,23 @@ static int make_fs_key( const struct svga_context *svga, PIPE_WINDING_CW); } + /* The blend workaround for simulating logicop xor behaviour + * requires that the incoming fragment color be white. This change + * achieves that by creating a varient of the current fragment + * shader that overrides all output colors with 1,1,1,1 + * + * This will work for most shaders, including those containing + * TEXKIL and/or depth-write. However, it will break on the + * combination of xor-logicop plus alphatest. + * + * Ultimately, we could implement alphatest in the shader using + * texkil prior to overriding the outgoing fragment color. + * + * SVGA_NEW_BLEND + */ + if (svga->curr.blend->need_white_fragments) { + key->white_fragments = 1; + } /* XXX: want to limit this to the textures that the shader actually * refers to. @@ -236,40 +191,29 @@ static int emit_hw_fs( struct svga_context *svga, unsigned id = SVGA3D_INVALID_ID; int ret = 0; + struct svga_fragment_shader *fs = svga->curr.fs; + struct svga_fs_compile_key key; + /* SVGA_NEW_BLEND + * SVGA_NEW_TEXTURE_BINDING + * SVGA_NEW_RAST + * SVGA_NEW_NEED_SWTNL + * SVGA_NEW_SAMPLER */ - if (svga->curr.blend->need_white_fragments) { - if (svga->state.white_fs_id == SVGA3D_INVALID_ID) { - ret = emit_white_fs( svga ); - if (ret) - return ret; - } - id = svga->state.white_fs_id; - } - else { - struct svga_fragment_shader *fs = svga->curr.fs; - struct svga_fs_compile_key key; - - /* SVGA_NEW_TEXTURE_BINDING - * SVGA_NEW_RAST - * SVGA_NEW_NEED_SWTNL - * SVGA_NEW_SAMPLER - */ - ret = make_fs_key( svga, &key ); + ret = make_fs_key( svga, &key ); + if (ret) + return ret; + + result = search_fs_key( fs, &key ); + if (!result) { + ret = compile_fs( svga, fs, &key, &result ); if (ret) return ret; - - result = search_fs_key( fs, &key ); - if (!result) { - ret = compile_fs( svga, fs, &key, &result ); - if (ret) - return ret; - } - - assert (result); - id = result->id; } + assert (result); + id = result->id; + assert(id != SVGA3D_INVALID_ID); if (result != svga->state.hw_draw.fs) { diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index 3c35a8579f7..dd13a89d24d 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c index c582e445245..5ce9b4ef4f9 100644 --- a/src/gallium/drivers/svga/svga_state_rss.c +++ b/src/gallium/drivers/svga/svga_state_rss.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" @@ -31,9 +31,6 @@ #include "svga_state.h" #include "svga_cmd.h" -#include "svga_hw_reg.h" - - struct rs_queue { unsigned rs_count; diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index b3137945202..17b47859781 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" @@ -33,8 +33,6 @@ #include "svga_state.h" #include "svga_cmd.h" -#include "svga_hw_reg.h" - void svga_cleanup_tss_binding(struct svga_context *svga) { diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index c534308f503..d1066ce13b0 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_upload_mgr.h" diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index ae1e77e7d44..d7999fe53d2 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -23,7 +23,7 @@ * **********************************************************/ -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_format.h" #include "util/u_math.h" @@ -71,7 +71,7 @@ static enum pipe_error compile_vs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret = PIPE_OK; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_vertex_program( vs, key ); if (result == NULL) { @@ -80,8 +80,10 @@ static enum pipe_error compile_vs( struct svga_context *svga, } result->id = util_bitmask_add(svga->vs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, @@ -200,10 +202,12 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; key.element[0].input_offset = vel->src_offset; + key.element[0].instance_divisor = vel->instance_divisor; key.element[0].output_offset = const_idx * 4 * sizeof(float); translate_key_sanitize(&key); @@ -222,7 +226,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, vbuffer->stride); - translate->run(translate, 0, 1, + translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); pipe_buffer_unmap(svga->pipe.screen, diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index b4f757a47a9..e9d7942fb57 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -28,10 +28,9 @@ #include "draw/draw_vertex.h" #include "util/u_debug.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_simple_shaders.h" #include "svga_context.h" #include "svga_state.h" @@ -87,13 +86,13 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render, if (!svga_render->vbuf) { svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); svga_render->vbuf = pipe_buffer_create(screen, - 0, + 16, PIPE_BUFFER_USAGE_VERTEX, svga_render->vbuf_size); if(!svga_render->vbuf) { svga_context_flush(svga, NULL); svga_render->vbuf = pipe_buffer_create(screen, - 0, + 16, PIPE_BUFFER_USAGE_VERTEX, svga_render->vbuf_size); assert(svga_render->vbuf); @@ -123,7 +122,9 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render ) char *ptr = (char*)pipe_buffer_map(screen, svga_render->vbuf, PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); return ptr + svga_render->vbuf_offset; } @@ -259,14 +260,14 @@ svga_vbuf_render_draw( struct vbuf_render *render, if (!svga_render->ibuf) { svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); svga_render->ibuf = pipe_buffer_create(screen, - 0, + 2, PIPE_BUFFER_USAGE_VERTEX, svga_render->ibuf_size); svga_render->ibuf_offset = 0; } - pipe_buffer_write(screen, svga_render->ibuf, - svga_render->ibuf_offset, 2 * nr_indices, indices); + pipe_buffer_write_nooverlap(screen, svga_render->ibuf, + svga_render->ibuf_offset, 2 * nr_indices, indices); /* off to hardware */ diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 7655121bec1..da15be155c8 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -25,9 +25,8 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_state.h" -#include "util/u_memory.h" #include "svga_context.h" #include "svga_swtnl.h" @@ -90,7 +89,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga, PIPE_BUFFER_USAGE_CPU_READ); assert(map); draw_set_mapped_constant_buffer( - draw, PIPE_SHADER_VERTEX, + draw, PIPE_SHADER_VERTEX, 0, map, svga->curr.cb[PIPE_SHADER_VERTEX]->size); } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 94b6ccc62dd..35f36a828fd 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -25,9 +25,8 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_state.h" -#include "util/u_memory.h" #include "svga_context.h" #include "svga_swtnl.h" diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 737a2213af5..063c9cf4221 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -49,6 +49,7 @@ struct svga_fs_compile_key { unsigned light_twoside:1; unsigned front_cw:1; + unsigned white_fragments:1; unsigned num_textures:8; unsigned num_unnormalized_coords:8; struct { diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c index 23b3ace7f30..1ae99067610 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c @@ -29,9 +29,6 @@ #include "util/u_memory.h" #include "svga_tgsi_emit.h" -#include "svga_context.h" - - static boolean ps20_input( struct svga_shader_emitter *emit, diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index d1c7336dec4..73102a72a83 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -29,7 +29,6 @@ #include "util/u_memory.h" #include "svga_tgsi_emit.h" -#include "svga_context.h" static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semantic, unsigned *usage, @@ -195,8 +194,19 @@ static boolean ps30_output( struct svga_shader_emitter *emit, switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: - emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, - semantic.Index ); + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fkey.white_fragments) { + + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_col[idx] = emit->output_map[idx]; + emit->true_col[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + } + else { + emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + } break; case TGSI_SEMANTIC_POSITION: emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 2557824293e..e8f75485d55 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -79,6 +79,8 @@ struct svga_shader_emitter int ps30_input_count; + int dynamic_branching_level; + boolean in_main_func; boolean created_zero_immediate; @@ -199,6 +201,23 @@ static INLINE boolean emit_op3( struct svga_shader_emitter *emit, } +static INLINE boolean emit_op4( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2, + struct src_register src3) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 ) && + emit_src( emit, src1 ) && + emit_src( emit, src2 ) && + emit_src( emit, src3 )); +} + + #define TRANSLATE_SWIZZLE(x,y,z,w) ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6)) #define SWIZZLE_XYZW \ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W) diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index dc5eb8fc606..be821e98217 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -46,8 +46,6 @@ translate_opcode( case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC; - case TGSI_OPCODE_DDX: return SVGA3DOP_DSX; - case TGSI_OPCODE_DDY: return SVGA3DOP_DSY; case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; @@ -415,6 +413,88 @@ static boolean submit_op3( struct svga_shader_emitter *emit, } + + +/* SVGA shaders may not refer to >1 constant register in a single + * instruction. This function checks for that usage and inserts a + * move to temporary if detected. + */ +static boolean submit_op4( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2, + struct src_register src3) +{ + SVGA3dShaderDestToken temp0; + SVGA3dShaderDestToken temp3; + boolean need_temp0 = FALSE; + boolean need_temp3 = FALSE; + SVGA3dShaderRegType type0, type1, type2, type3; + + temp0.value = 0; + temp3.value = 0; + type0 = SVGA3dShaderGetRegType( src0.base.value ); + type1 = SVGA3dShaderGetRegType( src1.base.value ); + type2 = SVGA3dShaderGetRegType( src2.base.value ); + type3 = SVGA3dShaderGetRegType( src2.base.value ); + + /* Make life a little easier - this is only used by the TXD + * instruction which is guaranteed not to have a constant/input reg + * in one slot at least: + */ + assert(type1 == SVGA3DREG_SAMPLER); + + if (type0 == SVGA3DREG_CONST && + ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || + (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type3 == SVGA3DREG_CONST && + (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num)) + need_temp3 = TRUE; + + if (type0 == SVGA3DREG_INPUT && + ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) || + (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type3 == SVGA3DREG_INPUT && + (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) + need_temp3 = TRUE; + + if (need_temp0) + { + temp0 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + return FALSE; + + src0 = src( temp0 ); + } + + if (need_temp3) + { + temp3 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 )) + return FALSE; + + src3 = src( temp3 ); + } + + if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) + return FALSE; + + if (need_temp3) + release_temp( emit, temp3 ); + if (need_temp0) + release_temp( emit, temp0 ); + return TRUE; +} + + static boolean emit_def_const( struct svga_shader_emitter *emit, SVGA3dShaderConstType type, unsigned idx, @@ -660,6 +740,8 @@ static boolean emit_if(struct svga_shader_emitter *emit, if_token.control = SVGA3DOPCOMPC_NE; zero = scalar(zero, TGSI_SWIZZLE_X); + emit->dynamic_branching_level++; + return (emit_instruction( emit, if_token ) && emit_src( emit, src ) && emit_src( emit, zero ) ); @@ -668,6 +750,8 @@ static boolean emit_if(struct svga_shader_emitter *emit, static boolean emit_endif(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { + emit->dynamic_branching_level--; + return (emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ))); } @@ -1011,10 +1095,10 @@ static boolean emit_kilp(struct svga_shader_emitter *emit, { SVGA3dShaderInstToken inst; SVGA3dShaderDestToken temp; - struct src_register one = get_zero_immediate( emit ); + struct src_register one = scalar( get_zero_immediate( emit ), + TGSI_SWIZZLE_W ); inst = inst_token( SVGA3DOP_TEXKILL ); - one = scalar( one, TGSI_SWIZZLE_W ); /* texkill doesn't allow negation on the operand so lets move * negation of {1} to a temp register */ @@ -1169,41 +1253,79 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst ) { SVGA3dShaderInstToken inst; - struct src_register src0; - struct src_register src1; - + struct src_register texcoord; + struct src_register sampler; + SVGA3dShaderDestToken tmp; + inst.value = 0; - inst.op = SVGA3DOP_TEX; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_TEX: + inst.op = SVGA3DOP_TEX; break; case TGSI_OPCODE_TXP: + inst.op = SVGA3DOP_TEX; inst.control = SVGA3DOPCONT_PROJECT; break; case TGSI_OPCODE_TXB: + inst.op = SVGA3DOP_TEX; inst.control = SVGA3DOPCONT_BIAS; break; + case TGSI_OPCODE_TXL: + inst.op = SVGA3DOP_TEXLDL; + break; default: assert(0); return FALSE; } - src0 = translate_src_register( emit, &insn->Src[0] ); - src1 = translate_src_register( emit, &insn->Src[1] ); + texcoord = translate_src_register( emit, &insn->Src[0] ); + sampler = translate_src_register( emit, &insn->Src[1] ); - if (emit->key.fkey.tex[src1.base.num].unnormalized) { - struct src_register wh = get_tex_dimensions( emit, src1.base.num ); - SVGA3dShaderDestToken tmp = get_temp( emit ); + if (emit->key.fkey.tex[sampler.base.num].unnormalized || + emit->dynamic_branching_level > 0) + tmp = get_temp( emit ); + + /* Can't do mipmapping inside dynamic branch constructs. Force LOD + * zero in that case. + */ + if (emit->dynamic_branching_level > 0 && + inst.op == SVGA3DOP_TEX && + SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { + struct src_register zero = get_zero_immediate( emit ); + + /* MOV tmp, texcoord */ + if (!submit_op1( emit, + inst_token( SVGA3DOP_MOV ), + tmp, + texcoord )) + return FALSE; + + /* MOV tmp.w, zero */ + if (!submit_op1( emit, + inst_token( SVGA3DOP_MOV ), + writemask( tmp, TGSI_WRITEMASK_W ), + scalar( zero, TGSI_SWIZZLE_X ))) + return FALSE; + + texcoord = src( tmp ); + inst.op = SVGA3DOP_TEXLDL; + } + + /* Explicit normalization of texcoords: + */ + if (emit->key.fkey.tex[sampler.base.num].unnormalized) { + struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); /* MUL tmp, SRC0, WH */ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), - tmp, src0, wh )) + tmp, texcoord, wh )) return FALSE; - src0 = src( tmp ); + + texcoord = src( tmp ); } - return submit_op2( emit, inst, dst, src0, src1 ); + return submit_op2( emit, inst, dst, texcoord, sampler ); } @@ -1211,31 +1333,33 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, /* Translate texture instructions to SVGA3D representation. */ -static boolean emit_tex3(struct svga_shader_emitter *emit, +static boolean emit_tex4(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, SVGA3dShaderDestToken dst ) { SVGA3dShaderInstToken inst; - struct src_register src0; - struct src_register src1; - struct src_register src2; + struct src_register texcoord; + struct src_register ddx; + struct src_register ddy; + struct src_register sampler; + + texcoord = translate_src_register( emit, &insn->Src[0] ); + ddx = translate_src_register( emit, &insn->Src[1] ); + ddy = translate_src_register( emit, &insn->Src[2] ); + sampler = translate_src_register( emit, &insn->Src[3] ); inst.value = 0; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_TXD: - inst.op = SVGA3DOP_TEXLDD; - break; - case TGSI_OPCODE_TXL: - inst.op = SVGA3DOP_TEXLDL; + inst.op = SVGA3DOP_TEXLDD; /* 4 args! */ break; + default: + assert(0); + return FALSE; } - src0 = translate_src_register( emit, &insn->Src[0] ); - src1 = translate_src_register( emit, &insn->Src[1] ); - src2 = translate_src_register( emit, &insn->Src[2] ); - - return submit_op3( emit, inst, dst, src0, src1, src2 ); + return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy ); } @@ -1271,12 +1395,12 @@ static boolean emit_tex(struct svga_shader_emitter *emit, case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXL: if (!emit_tex2( emit, insn, tex_result )) return FALSE; break; - case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXD: - if (!emit_tex3( emit, insn, tex_result )) + if (!emit_tex4( emit, insn, tex_result )) return FALSE; break; default: @@ -1330,6 +1454,8 @@ static boolean emit_bgnloop2( struct svga_shader_emitter *emit, struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); struct src_register const_int = get_loop_const( emit ); + emit->dynamic_branching_level++; + return (emit_instruction( emit, inst ) && emit_src( emit, loop_reg ) && emit_src( emit, const_int ) ); @@ -1339,6 +1465,9 @@ static boolean emit_endloop2( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); + + emit->dynamic_branching_level--; + return emit_instruction( emit, inst ); } @@ -1398,6 +1527,46 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit, } } + +static boolean emit_deriv(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + if (emit->dynamic_branching_level > 0 && + insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) + { + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderDestToken dst = + translate_dst_register( emit, insn, 0 ); + + /* Deriv opcodes not valid inside dynamic branching, workaround + * by zeroing out the destination. + */ + if (!submit_op1(emit, + inst_token( SVGA3DOP_MOV ), + dst, + scalar(zero, TGSI_SWIZZLE_X))) + return FALSE; + + return TRUE; + } + else { + unsigned opcode; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DDX: + opcode = SVGA3DOP_DSX; + break; + case TGSI_OPCODE_DDY: + opcode = SVGA3DOP_DSY; + break; + default: + return FALSE; + } + + return emit_simple_instruction( emit, opcode, insn ); + } +} + static boolean emit_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { @@ -2002,6 +2171,10 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_TXD: return emit_tex( emit, insn ); + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + return emit_deriv( emit, insn ); + case TGSI_OPCODE_BGNSUB: return emit_bgnsub( emit, position, insn ); @@ -2254,11 +2427,28 @@ static boolean emit_ps_postamble( struct svga_shader_emitter *emit ) for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { - if (!submit_op1( emit, - inst_token(SVGA3DOP_MOV), - emit->true_col[i], - src(emit->temp_col[i]) )) - return FALSE; + /* Potentially override output colors with white for XOR + * logicop workaround. + */ + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fkey.white_fragments) { + + struct src_register one = scalar( get_zero_immediate( emit ), + TGSI_SWIZZLE_W ); + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + one )) + return FALSE; + } + else { + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + src(emit->temp_col[i]) )) + return FALSE; + } } } @@ -2467,6 +2657,9 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) if (emit->key.fkey.light_twoside) return TRUE; + if (emit->key.fkey.white_fragments) + return TRUE; + if (emit->emit_frontface) return TRUE; @@ -2476,6 +2669,10 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) } if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || @@ -2702,6 +2899,8 @@ boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, goto done; } + assert(emit->dynamic_branching_level == 0); + /* Need to terminate the whole shader: */ ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 59f299c1858..b4e3af0eafc 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -272,9 +272,6 @@ struct svga_winsys_screen }; -struct pipe_context * -svga_context_create(struct pipe_screen *screen); - struct pipe_screen * svga_screen_create(struct svga_winsys_screen *sws); @@ -296,4 +293,10 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture, struct pipe_buffer **buffer, unsigned *stride); +struct pipe_texture * +svga_screen_texture_wrap_surface(struct pipe_screen *screen, + struct pipe_texture *base, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf); + #endif /* SVGA_WINSYS_H_ */ diff --git a/src/gallium/drivers/trace/tr_buffer.c b/src/gallium/drivers/trace/tr_buffer.c index 4f0eff6a5a4..fa2ac068ebc 100644 --- a/src/gallium/drivers/trace/tr_buffer.c +++ b/src/gallium/drivers/trace/tr_buffer.c @@ -26,6 +26,7 @@ **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_simple_list.h" diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 075e4f9a0b2..34ceaa41c15 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -812,13 +812,13 @@ trace_context_set_clip_state(struct pipe_context *_pipe, static INLINE void trace_context_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *buffer) + struct pipe_buffer *buffer) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; if (buffer) - trace_screen_user_buffer_update(_pipe->screen, buffer->buffer); + trace_screen_user_buffer_update(_pipe->screen, buffer); trace_dump_call_begin("pipe_context", "set_constant_buffer"); @@ -827,10 +827,11 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, trace_dump_arg(uint, index); trace_dump_arg(constant_buffer, buffer); + /* XXX hmm? */ if (buffer) { - struct pipe_constant_buffer _buffer; - _buffer.buffer = trace_buffer_unwrap(tr_ctx, buffer->buffer); - pipe->set_constant_buffer(pipe, shader, index, &_buffer); + struct pipe_buffer *_buffer; + _buffer = trace_buffer_unwrap(tr_ctx, buffer); + pipe->set_constant_buffer(pipe, shader, index, _buffer); } else { pipe->set_constant_buffer(pipe, shader, index, buffer); } @@ -1235,12 +1236,10 @@ static const struct debug_named_value rbug_blocker_flags[] = { }; struct pipe_context * -trace_context_create(struct pipe_screen *_screen, +trace_context_create(struct trace_screen *tr_scr, struct pipe_context *pipe) { - struct trace_screen *tr_scr; struct trace_context *tr_ctx; - struct pipe_screen *screen; if(!pipe) goto error1; @@ -1248,13 +1247,13 @@ trace_context_create(struct pipe_screen *_screen, if(!trace_enabled()) goto error1; - tr_scr = trace_screen(_screen); - screen = tr_scr->screen; - tr_ctx = CALLOC_STRUCT(trace_context); if(!tr_ctx) goto error1; + tr_ctx->base.winsys = NULL; + tr_ctx->base.priv = pipe->priv; /* expose wrapped priv data */ + tr_ctx->base.screen = &tr_scr->base; tr_ctx->draw_blocker = debug_get_flags_option("RBUG_BLOCK", rbug_blocker_flags, 0); @@ -1263,8 +1262,6 @@ trace_context_create(struct pipe_screen *_screen, pipe_mutex_init(tr_ctx->list_mutex); make_empty_list(&tr_ctx->shaders); - tr_ctx->base.winsys = _screen->winsys; - tr_ctx->base.screen = _screen; tr_ctx->base.destroy = trace_context_destroy; tr_ctx->base.draw_arrays = trace_context_draw_arrays; tr_ctx->base.draw_elements = trace_context_draw_elements; @@ -1315,11 +1312,6 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->pipe = pipe; - trace_dump_call_begin("", "pipe_context_create"); - trace_dump_arg(ptr, screen); - trace_dump_ret(ptr, pipe); - trace_dump_call_end(); - trace_screen_add_to_list(tr_scr, contexts, tr_ctx); return &tr_ctx->base; diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 852b480765a..14284232485 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -40,6 +40,8 @@ extern "C" { #endif +struct trace_screen; + struct trace_context { struct pipe_context base; @@ -95,9 +97,8 @@ trace_context(struct pipe_context *pipe) } - struct pipe_context * -trace_context_create(struct pipe_screen *screen, +trace_context_create(struct trace_screen *tr_scr, struct pipe_context *pipe); void diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index 48d1c4051cc..919dc1b309f 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -65,24 +65,6 @@ trace_drm_create_screen(struct drm_api *_api, int fd, return trace_screen_create(screen); } -static struct pipe_context * -trace_drm_create_context(struct drm_api *_api, - struct pipe_screen *_screen) -{ - struct trace_screen *tr_screen = trace_screen(_screen); - struct trace_drm_api *tr_api = trace_drm_api(_api); - struct pipe_screen *screen = tr_screen->screen; - struct drm_api *api = tr_api->api; - struct pipe_context *pipe; - - /* TODO trace call */ - - pipe = api->create_context(api, screen); - - pipe = trace_context_create(_screen, pipe); - - return pipe; -} static struct pipe_texture * trace_drm_texture_from_shared_handle(struct drm_api *_api, @@ -173,8 +155,8 @@ trace_drm_create(struct drm_api *api) if (!tr_api) goto error; + tr_api->base.driver_name = api->driver_name; tr_api->base.create_screen = trace_drm_create_screen; - tr_api->base.create_context = trace_drm_create_context; tr_api->base.texture_from_shared_handle = trace_drm_texture_from_shared_handle; tr_api->base.shared_handle_from_texture = trace_drm_shared_handle_from_texture; tr_api->base.local_handle_from_texture = trace_drm_local_handle_from_texture; diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 0f45e211a32..8de451c22ce 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -45,11 +45,11 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" +#include "os/os_stream.h" #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_string.h" -#include "util/u_stream.h" #include "tr_dump.h" #include "tr_screen.h" @@ -57,7 +57,7 @@ #include "tr_buffer.h" -static struct util_stream *stream = NULL; +static struct os_stream *stream = NULL; static unsigned refcount = 0; static pipe_mutex call_mutex; static long unsigned call_no = 0; @@ -69,7 +69,7 @@ static INLINE void trace_dump_write(const char *buf, size_t size) { if(stream) - util_stream_write(stream, buf, size); + os_stream_write(stream, buf, size); } @@ -220,7 +220,7 @@ trace_dump_trace_close(void) { if(stream) { trace_dump_writes("</trace>\n"); - util_stream_close(stream); + os_stream_close(stream); stream = NULL; refcount = 0; call_no = 0; @@ -250,7 +250,7 @@ boolean trace_dump_trace_begin() if(!stream) { - stream = util_stream_create(filename, 0); + stream = os_stream_create(filename, 0); if(!stream) return FALSE; @@ -367,7 +367,7 @@ void trace_dump_call_end_locked(void) trace_dump_indent(1); trace_dump_tag_end("call"); trace_dump_newline(); - util_stream_flush(stream); + os_stream_flush(stream); } void trace_dump_call_begin(const char *klass, const char *method) diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 720b6cd1ffa..6648539a0fa 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -49,7 +49,7 @@ static void trace_dump_reference(const struct pipe_reference *reference) return; trace_dump_struct_begin("pipe_reference"); - trace_dump_member(int, &reference->count, count); + trace_dump_member(int, reference, count); trace_dump_struct_end(); } @@ -227,7 +227,7 @@ void trace_dump_clip_state(const struct pipe_clip_state *state) } -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +void trace_dump_constant_buffer(const struct pipe_buffer *state) { if (!trace_dumping_enabled_locked()) return; @@ -239,7 +239,7 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) trace_dump_struct_begin("pipe_constant_buffer"); - trace_dump_member(buffer_ptr, state, buffer); + trace_dump_reference(&state->reference); trace_dump_struct_end(); } @@ -321,9 +321,23 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_ trace_dump_struct_end(); } +static void trace_dump_rt_blend_state(const struct pipe_rt_blend_state *state) +{ + trace_dump_member(uint, state, rgb_func); + trace_dump_member(uint, state, rgb_src_factor); + trace_dump_member(uint, state, rgb_dst_factor); + + trace_dump_member(uint, state, alpha_func); + trace_dump_member(uint, state, alpha_src_factor); + trace_dump_member(uint, state, alpha_dst_factor); + + trace_dump_member(uint, state, colormask); + +} void trace_dump_blend_state(const struct pipe_blend_state *state) { + unsigned valid_entries = 1; if (!trace_dumping_enabled_locked()) return; @@ -334,21 +348,17 @@ void trace_dump_blend_state(const struct pipe_blend_state *state) trace_dump_struct_begin("pipe_blend_state"); - trace_dump_member(bool, state, blend_enable); - - trace_dump_member(uint, state, rgb_func); - trace_dump_member(uint, state, rgb_src_factor); - trace_dump_member(uint, state, rgb_dst_factor); - - trace_dump_member(uint, state, alpha_func); - trace_dump_member(uint, state, alpha_src_factor); - trace_dump_member(uint, state, alpha_dst_factor); + trace_dump_member(bool, state, dither); trace_dump_member(bool, state, logicop_enable); trace_dump_member(uint, state, logicop_func); - trace_dump_member(uint, state, colormask); - trace_dump_member(bool, state, dither); + trace_dump_member(bool, state, independent_blend_enable); + + if (state->independent_blend_enable) + valid_entries = PIPE_MAX_COLOR_BUFS; + + trace_dump_struct_array(rt_blend_state, state->rt, valid_entries); trace_dump_struct_end(); } @@ -410,7 +420,6 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state) trace_dump_member(uint, state, compare_mode); trace_dump_member(uint, state, compare_func); trace_dump_member(bool, state, normalized_coords); - trace_dump_member(uint, state, prefilter); trace_dump_member(float, state, lod_bias); trace_dump_member(float, state, min_lod); trace_dump_member(float, state, max_lod); diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 07ad6fbb205..c7860fd6e18 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -47,7 +47,7 @@ void trace_dump_scissor_state(const struct pipe_scissor_state *state); void trace_dump_clip_state(const struct pipe_clip_state *state); -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); +void trace_dump_constant_buffer(const struct pipe_buffer *state); void trace_dump_token(const struct tgsi_token *token); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index 0546aad9b50..691b83c63fb 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -26,11 +26,13 @@ **************************************************************************/ +#include "os/os_thread.h" #include "util/u_format.h" #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_simple_list.h" #include "util/u_network.h" +#include "util/u_time.h" #include "tgsi/tgsi_parse.h" @@ -43,15 +45,6 @@ #include <errno.h> -#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) -# define sleep Sleep -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE) -void usleep(int); -# define sleep usleep -#else -# warning "No socket implementation" -#endif - #define U642VOID(x) ((void *)(unsigned long)(x)) #define VOID2U64(x) ((uint64_t)(unsigned long)(x)) @@ -805,7 +798,7 @@ PIPE_THREAD_ROUTINE(trace_rbug_thread, void_tr_rbug) debug_printf("trace_rbug - remote debugging listening on port %u\n", --port); while(tr_rbug->running) { - sleep(1); + util_time_sleep(1); c = u_socket_accept(s); if (c < 0) diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 117503aaff6..388d83eb5c2 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -33,9 +33,10 @@ #include "tr_dump.h" #include "tr_dump_state.h" #include "tr_texture.h" +#include "tr_context.h" #include "tr_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_format.h" @@ -159,6 +160,29 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, } +static struct pipe_context * +trace_screen_context_create(struct pipe_screen *_screen, void *priv) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct pipe_context *result; + + trace_dump_call_begin("pipe_screen", "context_create"); + + trace_dump_arg(ptr, screen); + + result = screen->context_create(screen, priv); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_context_create(tr_scr, result); + + return result; +} + + static void trace_screen_flush_frontbuffer(struct pipe_screen *_screen, struct pipe_surface *_surface, @@ -904,6 +928,8 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.get_param = trace_screen_get_param; tr_scr->base.get_paramf = trace_screen_get_paramf; tr_scr->base.is_format_supported = trace_screen_is_format_supported; + assert(screen->context_create); + tr_scr->base.context_create = trace_screen_context_create; tr_scr->base.texture_create = trace_screen_texture_create; tr_scr->base.texture_blanket = trace_screen_texture_blanket; tr_scr->base.texture_destroy = trace_screen_texture_destroy; diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index dba8cd7c653..fe5a0fa1909 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -30,7 +30,7 @@ #include "pipe/p_screen.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #ifdef __cplusplus diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 1f25fe38d4c..5321d68ec0c 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_hash_table.h" #include "util/u_memory.h" #include "util/u_simple_list.h" diff --git a/src/gallium/include/pipe/internal/p_winsys_screen.h b/src/gallium/include/pipe/internal/p_winsys_screen.h deleted file mode 100644 index a1542dada70..00000000000 --- a/src/gallium/include/pipe/internal/p_winsys_screen.h +++ /dev/null @@ -1,190 +0,0 @@ - /************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * This is the interface that Gallium3D requires any window system - * hosting it to implement. This is the only include file in Gallium3D - * which is public. - */ - -#ifndef P_WINSYS_H -#define P_WINSYS_H - - -#include "pipe/p_format.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** Opaque type */ -struct pipe_fence_handle; - -struct pipe_surface; - - -/** - * Gallium3D drivers are (meant to be!) independent of both GL and the - * window system. The window system provides a buffer manager and a - * set of additional hooks for things like command buffer submission, - * etc. - * - * There clearly has to be some agreement between the window system - * driver and the hardware driver about the format of command buffers, - * etc. - */ -struct pipe_winsys -{ - void (*destroy)( struct pipe_winsys *ws ); - - /** Returns name of this winsys interface */ - const char *(*get_name)( struct pipe_winsys *ws ); - - /** - * Do any special operations to ensure buffer size is correct - */ - void (*update_buffer)( struct pipe_winsys *ws, - void *context_private ); - /** - * Do any special operations to ensure frontbuffer contents are - * displayed, eg copy fake frontbuffer. - */ - void (*flush_frontbuffer)( struct pipe_winsys *ws, - struct pipe_surface *surf, - void *context_private ); - - - /** - * Buffer management. Buffer attributes are mostly fixed over its lifetime. - * - * Remember that gallium gets to choose the interface it needs, and the - * window systems must then implement that interface (rather than the - * other way around...). - * - * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This - * usage argument is only an optimization hint, not a guarantee, therefore - * proper behavior must be observed in all circumstances. - * - * alignment indicates the client's alignment requirements, eg for - * SSE instructions. - */ - struct pipe_buffer *(*buffer_create)( struct pipe_winsys *ws, - unsigned alignment, - unsigned usage, - unsigned size ); - - /** - * Create a buffer that wraps user-space data. - * - * Effectively this schedules a delayed call to buffer_create - * followed by an upload of the data at *some point in the future*, - * or perhaps never. Basically the allocate/upload is delayed - * until the buffer is actually passed to hardware. - * - * The intention is to provide a quick way to turn regular data - * into a buffer, and secondly to avoid a copy operation if that - * data subsequently turns out to be only accessed by the CPU. - * - * Common example is OpenGL vertex buffers that are subsequently - * processed either by software TNL in the driver or by passing to - * hardware. - * - * XXX: What happens if the delayed call to buffer_create() fails? - * - * Note that ptr may be accessed at any time upto the time when the - * buffer is destroyed, so the data must not be freed before then. - */ - struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *ws, - void *ptr, - unsigned bytes); - - /** - * Allocate storage for a display target surface. - * - * Often surfaces which are meant to be blitted to the front screen (i.e., - * display targets) must be allocated with special characteristics, memory - * pools, or obtained directly from the windowing system. - * - * This callback is invoked by the pipe_screenwhen creating a texture marked - * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying - * buffer storage. - */ - struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride); - - - /** - * Map the entire data store of a buffer object into the client's address. - * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. - */ - void *(*buffer_map)( struct pipe_winsys *ws, - struct pipe_buffer *buf, - unsigned usage ); - - void (*buffer_unmap)( struct pipe_winsys *ws, - struct pipe_buffer *buf ); - - void (*buffer_destroy)( struct pipe_buffer *buf ); - - - /** Set ptr = fence, with reference counting */ - void (*fence_reference)( struct pipe_winsys *ws, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence ); - - /** - * Checks whether the fence has been signalled. - * \param flags driver-specific meaning - * \return zero on success. - */ - int (*fence_signalled)( struct pipe_winsys *ws, - struct pipe_fence_handle *fence, - unsigned flag ); - - /** - * Wait for the fence to finish. - * \param flags driver-specific meaning - * \return zero on success. - */ - int (*fence_finish)( struct pipe_winsys *ws, - struct pipe_fence_handle *fence, - unsigned flag ); - -}; - -#ifdef __cplusplus -} -#endif - -#endif /* P_WINSYS_H */ diff --git a/src/gallium/include/pipe/p_atomic.h b/src/gallium/include/pipe/p_atomic.h deleted file mode 100644 index 0c3fbae428c..00000000000 --- a/src/gallium/include/pipe/p_atomic.h +++ /dev/null @@ -1,353 +0,0 @@ -/** - * Many similar implementations exist. See for example libwsbm - * or the linux kernel include/atomic.h - * - * No copyright claimed on this file. - * - */ - -#ifndef P_ATOMIC_H -#define P_ATOMIC_H - -#include "p_compiler.h" -#include "p_defines.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -/* Favor OS-provided implementations. - * - * Where no OS-provided implementation is available, fall back to - * locally coded assembly, compiler intrinsic or ultimately a - * mutex-based implementation. - */ -#if (defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || \ - defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)) -#define PIPE_ATOMIC_OS_UNLOCKED -#elif (defined(PIPE_CC_MSVC) && defined(PIPE_SUBSYSTEM_WINDOWS_USER)) -#define PIPE_ATOMIC_OS_MS_INTERLOCK -#elif (defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)) -#define PIPE_ATOMIC_ASM_MSVC_X86 -#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)) -#define PIPE_ATOMIC_ASM_GCC_X86 -#elif defined(PIPE_CC_GCC) -#define PIPE_ATOMIC_GCC_INTRINSIC -#else -#define PIPE_ATOMIC_MUTEX -#endif - - - -#if defined(PIPE_ATOMIC_ASM_GCC_X86) - -#define PIPE_ATOMIC "GCC x86 assembly" - -struct pipe_atomic { - int32_t count; -}; - -#define p_atomic_set(_v, _i) ((_v)->count = (_i)) -#define p_atomic_read(_v) ((_v)->count) - - -static INLINE boolean -p_atomic_dec_zero(struct pipe_atomic *v) -{ - unsigned char c; - - __asm__ __volatile__("lock; decl %0; sete %1":"+m"(v->count), "=qm"(c) - ::"memory"); - - return c != 0; -} - -static INLINE void -p_atomic_inc(struct pipe_atomic *v) -{ - __asm__ __volatile__("lock; incl %0":"+m"(v->count)); -} - -static INLINE void -p_atomic_dec(struct pipe_atomic *v) -{ - __asm__ __volatile__("lock; decl %0":"+m"(v->count)); -} - -static INLINE int32_t -p_atomic_cmpxchg(struct pipe_atomic *v, int32_t old, int32_t _new) -{ - return __sync_val_compare_and_swap(&v->count, old, _new); -} -#endif - - - -/* Implementation using GCC-provided synchronization intrinsics - */ -#if defined(PIPE_ATOMIC_GCC_INTRINSIC) - -#define PIPE_ATOMIC "GCC Sync Intrinsics" - -struct pipe_atomic { - int32_t count; -}; - -#define p_atomic_set(_v, _i) ((_v)->count = (_i)) -#define p_atomic_read(_v) ((_v)->count) - - -static INLINE boolean -p_atomic_dec_zero(struct pipe_atomic *v) -{ - return (__sync_sub_and_fetch(&v->count, 1) == 0); -} - -static INLINE void -p_atomic_inc(struct pipe_atomic *v) -{ - (void) __sync_add_and_fetch(&v->count, 1); -} - -static INLINE void -p_atomic_dec(struct pipe_atomic *v) -{ - (void) __sync_sub_and_fetch(&v->count, 1); -} - -static INLINE int32_t -p_atomic_cmpxchg(struct pipe_atomic *v, int32_t old, int32_t _new) -{ - return __sync_val_compare_and_swap(&v->count, old, _new); -} -#endif - - - -/* Unlocked version for single threaded environments, such as some - * windows kernel modules. - */ -#if defined(PIPE_ATOMIC_OS_UNLOCKED) - -#define PIPE_ATOMIC "Unlocked" - -struct pipe_atomic -{ - int32_t count; -}; - -#define p_atomic_set(_v, _i) ((_v)->count = (_i)) -#define p_atomic_read(_v) ((_v)->count) -#define p_atomic_dec_zero(_v) ((boolean) --(_v)->count) -#define p_atomic_inc(_v) ((void) (_v)->count++) -#define p_atomic_dec(_v) ((void) (_v)->count--) -#define p_atomic_cmpxchg(_v, old, _new) ((_v)->count == old ? (_v)->count = (_new) : (_v)->count) - -#endif - - -/* Locally coded assembly for MSVC on x86: - */ -#if defined(PIPE_ATOMIC_ASM_MSVC_X86) - -#define PIPE_ATOMIC "MSVC x86 assembly" - -struct pipe_atomic -{ - int32_t count; -}; - -#define p_atomic_set(_v, _i) ((_v)->count = (_i)) -#define p_atomic_read(_v) ((_v)->count) - -static INLINE boolean -p_atomic_dec_zero(struct pipe_atomic *v) -{ - int32_t *pcount = &v->count; - unsigned char c; - - __asm { - mov eax, [pcount] - lock dec dword ptr [eax] - sete byte ptr [c] - } - - return c != 0; -} - -static INLINE void -p_atomic_inc(struct pipe_atomic *v) -{ - int32_t *pcount = &v->count; - - __asm { - mov eax, [pcount] - lock inc dword ptr [eax] - } -} - -static INLINE void -p_atomic_dec(struct pipe_atomic *v) -{ - int32_t *pcount = &v->count; - - __asm { - mov eax, [pcount] - lock dec dword ptr [eax] - } -} - -static INLINE int32_t -p_atomic_cmpxchg(struct pipe_atomic *v, int32_t old, int32_t _new) -{ - int32_t *pcount = &v->count; - int32_t orig; - - __asm { - mov ecx, [pcount] - mov eax, [old] - mov edx, [_new] - lock cmpxchg [ecx], edx - mov [orig], eax - } - - return orig; -} -#endif - - -#if defined(PIPE_ATOMIC_OS_MS_INTERLOCK) - -#define PIPE_ATOMIC "MS userspace interlocks" - -#include <windows.h> - -struct pipe_atomic -{ - volatile long count; -}; - -#define p_atomic_set(_v, _i) ((_v)->count = (_i)) -#define p_atomic_read(_v) ((_v)->count) - -static INLINE boolean -p_atomic_dec_zero(struct pipe_atomic *v) -{ - return InterlockedDecrement(&v->count) == 0; -} - -static INLINE void -p_atomic_inc(struct pipe_atomic *v) -{ - InterlockedIncrement(&v->count); -} - -static INLINE void -p_atomic_dec(struct pipe_atomic *v) -{ - InterlockedDecrement(&v->count); -} - -static INLINE int32_t -p_atomic_cmpxchg(struct pipe_atomic *v, int32_t old, int32_t _new) -{ - return InterlockedCompareExchange(&v->count, _new, old); -} - -#endif - - - -#if defined(PIPE_ATOMIC_MUTEX) - -#define PIPE_ATOMIC "mutex-based fallback" - -#include "pipe/p_thread.h" - -/** - * This implementation should really not be used. - * Add an assembly port instead. It may abort and - * doesn't destroy used mutexes. - */ - -struct pipe_atomic { - pipe_mutex mutex; - int32_t count; -}; - -static INLINE void -p_atomic_set(struct pipe_atomic *v, int32_t i) -{ - pipe_mutex_init(v->mutex); - pipe_mutex_lock(v->mutex); - v->count = i; - pipe_mutex_unlock(v->mutex); -} - -static INLINE int32_t -p_atomic_read(struct pipe_atomic *v) -{ - int32_t ret; - - pipe_mutex_lock(v->mutex); - ret = v->count; - pipe_mutex_unlock(v->mutex); - return ret; -} - -static INLINE void -p_atomic_inc(struct pipe_atomic *v) -{ - pipe_mutex_lock(v->mutex); - ++v->count; - pipe_mutex_unlock(v->mutex); -} - -static INLINE void -p_atomic_dec(struct pipe_atomic *v) -{ - pipe_mutex_lock(v->mutex); - --v->count; - pipe_mutex_unlock(v->mutex); -} - -static INLINE boolean -p_atomic_dec_zero(struct pipe_atomic *v) -{ - boolean ret; - - pipe_mutex_lock(v->mutex); - ret = (--v->count == 0); - pipe_mutex_unlock(v->mutex); - return ret; -} - -static INLINE int32_t -p_atomic_cmpxchg(struct pipe_atomic *v, int32_t old, int32_t _new) -{ - int32_t ret; - - pipe_mutex_lock(v->mutex); - ret = v->count; - if (ret == old) - v->count = _new; - pipe_mutex_unlock(v->mutex); - - return ret; -} - -#endif - - -#ifndef PIPE_ATOMIC -#error "No pipe_atomic implementation selected" -#endif - - - -#ifdef __cplusplus -} -#endif - -#endif /* P_ATOMIC_H */ diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 26a940593f0..c7d35074948 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -38,6 +38,8 @@ #include "xf86_ansic.h" #include "xf86_libc.h" #endif +#include <stddef.h> +#include <stdarg.h> #if defined(_WIN32) && !defined(__WIN32__) @@ -63,7 +65,7 @@ #include <stdbool.h> -#ifndef __HAIKU__ +#if !defined(__HAIKU__) && !defined(__USE_MISC) typedef unsigned int uint; typedef unsigned short ushort; #endif @@ -104,7 +106,8 @@ typedef unsigned char boolean; /* Function visibility */ #ifndef PUBLIC -# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +# if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PUBLIC __attribute__((visibility("default"))) # else # define PUBLIC @@ -124,6 +127,9 @@ typedef unsigned char boolean; # define __FUNCTION__ "<unknown>" # endif # endif +# if defined(_MSC_VER) && _MSC_VER < 1300 +# define __FUNCTION__ "<unknown>" +# endif #endif @@ -139,22 +145,48 @@ typedef unsigned char boolean; -#if defined(__GNUC__) -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) -#define ALIGN16_ASSIGN(NAME) NAME##___aligned -#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) -#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) +#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define PIPE_DEPRECATED __attribute__((__deprecated__)) +#else +#define PIPE_DEPRECATED +#endif + + + +/* Macros for data alignment. */ +#if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */ +#define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment))) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */ +#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment))) + #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) -#define ALIGN_STACK __attribute__((force_align_arg_pointer)) +#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer)) #else -#define ALIGN_STACK +#define PIPE_ALIGN_STACK #endif + +#elif defined(_MSC_VER) + +/* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */ +#define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type +#define PIPE_ALIGN_VAR(_alignment) __declspec(align(_alignment)) + +#define PIPE_ALIGN_STACK + +#elif defined(SWIG) + +#define PIPE_ALIGN_TYPE(_alignment, _type) _type +#define PIPE_ALIGN_VAR(_alignment) + +#define PIPE_ALIGN_STACK + #else -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] -#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) -#define ALIGN16_ATTRIB -#define ALIGN8_ATTRIB -#define ALIGN_STACK + +#error "Unsupported compiler" + #endif diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 064605a4a05..c5928dde471 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -115,8 +115,10 @@ #endif +#if !defined(PIPE_OS_EMBEDDED) + /* - * Operating system family. + * Auto-detect the operating system family. * * See subsystem below for a more fine-grained distinction. */ @@ -164,7 +166,7 @@ #endif /* - * Subsystem. + * Try to auto-detect the subsystem. * * NOTE: There is no way to auto-detect most of these. */ @@ -191,5 +193,7 @@ #endif #endif /* PIPE_OS_WINDOWS */ +#endif /* !PIPE_OS_EMBEDDED */ + #endif /* P_CONFIG_H_ */ diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index d2f8085b421..f1e6a60e041 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -69,6 +69,22 @@ struct pipe_context { unsigned indexSize, unsigned mode, unsigned start, unsigned count); + void (*draw_arrays_instanced)(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void (*draw_elements_instanced)(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. * Using this to quickly evaluate a specialized path in the draw @@ -87,7 +103,7 @@ struct pipe_context { /** * Predicate subsequent rendering on occlusion query result * \param query the query predicate, or NULL if no predicate - * \param mode one of PIPE_COND_RENDER_x + * \param mode one of PIPE_RENDER_COND_x */ void (*render_condition)( struct pipe_context *pipe, struct pipe_query *query, @@ -106,6 +122,11 @@ struct pipe_context { void (*begin_query)(struct pipe_context *pipe, struct pipe_query *q); void (*end_query)(struct pipe_context *pipe, struct pipe_query *q); + /** + * Get results of a query. + * \param wait if true, this query will block until the result is ready + * \return TRUE if results are ready, FALSE otherwise + */ boolean (*get_query_result)(struct pipe_context *pipe, struct pipe_query *q, boolean wait, @@ -170,7 +191,7 @@ struct pipe_context { void (*set_constant_buffer)( struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf ); + struct pipe_buffer *buf ); void (*set_framebuffer_state)( struct pipe_context *, const struct pipe_framebuffer_state * ); @@ -255,30 +276,30 @@ struct pipe_context { /** * Check whether a texture is referenced by an unflushed hw command. - * The state-tracker uses this function to optimize away unnecessary - * flushes. It is safe (but wasteful) to always return. + * The state-tracker uses this function to avoid unnecessary flushes. + * It is safe (but wasteful) to always return * PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE. - * \param pipe The pipe context whose unflushed hw commands will be - * checked. - * \param level mipmap level. + * \param pipe context whose unflushed hw commands will be checked. * \param texture texture to check. * \param face cubemap face. Use 0 for non-cubemap texture. + * \param level mipmap level. + * \return mask of PIPE_REFERENCED_FOR_READ/WRITE or PIPE_UNREFERENCED */ - unsigned int (*is_texture_referenced) (struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level); + unsigned int (*is_texture_referenced)(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level); /** * Check whether a buffer is referenced by an unflushed hw command. - * The state-tracker uses this function to optimize away unnecessary - * flushes. It is safe (but wasteful) to always return + * The state-tracker uses this function to avoid unnecessary flushes. + * It is safe (but wasteful) to always return * PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE. - * \param pipe The pipe context whose unflushed hw commands will be - * checked. - * \param buf Buffer to check. + * \param pipe context whose unflushed hw commands will be checked. + * \param buf buffer to check. + * \return mask of PIPE_REFERENCED_FOR_READ/WRITE or PIPE_UNREFERENCED */ - unsigned int (*is_buffer_referenced) (struct pipe_context *pipe, - struct pipe_buffer *buf); + unsigned int (*is_buffer_referenced)(struct pipe_context *pipe, + struct pipe_buffer *buf); }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a85a1701536..5cebd43ace2 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -376,7 +376,7 @@ enum pipe_transfer_usage { #define PIPE_CAP_NPOT_TEXTURES 2 #define PIPE_CAP_TWO_SIDED_STENCIL 3 #define PIPE_CAP_GLSL 4 /* XXX need something better */ -#define PIPE_CAP_S3TC 5 /* XXX: deprecated; cap determined via supported sampler formats */ +#define PIPE_CAP_DUAL_SOURCE_BLEND 5 #define PIPE_CAP_ANISOTROPIC_FILTER 6 #define PIPE_CAP_POINT_SPRITE 7 #define PIPE_CAP_MAX_RENDER_TARGETS 8 @@ -404,6 +404,14 @@ enum pipe_transfer_usage { #define PIPE_CAP_MAX_PREDICATE_REGISTERS 30 #define PIPE_CAP_MAX_COMBINED_SAMPLERS 31 /*< Maximum texture image units accessible from vertex and fragment shaders combined */ +#define PIPE_CAP_MAX_CONST_BUFFERS 32 +#define PIPE_CAP_MAX_CONST_BUFFER_SIZE 33 /*< In bytes */ +#define PIPE_CAP_INDEP_BLEND_ENABLE 34 /*< blend enables and write masks per rendertarget */ +#define PIPE_CAP_INDEP_BLEND_FUNC 35 /*< different blend funcs per rendertarget */ +#define PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT 36 +#define PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT 37 +#define PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER 38 +#define PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER 39 /** diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 6bfff1cc59c..2894e13e7df 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -31,10 +31,6 @@ #include "p_compiler.h" -/* FIXME: remove these header dependencies */ -#include "util/u_debug.h" -#include "util/u_string.h" - #ifdef __cplusplus extern "C" { #endif diff --git a/src/gallium/include/pipe/p_refcnt.h b/src/gallium/include/pipe/p_refcnt.h deleted file mode 100644 index c1c7415e023..00000000000 --- a/src/gallium/include/pipe/p_refcnt.h +++ /dev/null @@ -1,95 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef P_REFCNT_H -#define P_REFCNT_H - - -#include "p_defines.h" -#include "p_atomic.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_reference -{ - struct pipe_atomic count; -}; - - -static INLINE void -pipe_reference_init(struct pipe_reference *reference, unsigned count) -{ - p_atomic_set(&reference->count, count); -} - - -static INLINE boolean -pipe_is_referenced(struct pipe_reference *reference) -{ - return p_atomic_read(&reference->count) != 0; -} - - -/** - * Update reference counting. - * The old thing pointed to, if any, will be unreferenced. - * Both 'ptr' and 'reference' may be NULL. - * \return TRUE if the object's refcount hits zero and should be destroyed. - */ -static INLINE boolean -pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference) -{ - boolean destroy = FALSE; - - if(ptr != reference) { - /* bump the reference.count first */ - if (reference) { - assert(pipe_is_referenced(reference)); - p_atomic_inc(&reference->count); - } - - if (ptr) { - assert(pipe_is_referenced(ptr)); - if (p_atomic_dec_zero(&ptr->count)) { - destroy = TRUE; - } - } - } - - return destroy; -} - - -#ifdef __cplusplus -} -#endif - -#endif /* P_REFCNT_H */ diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index b8e001a6b01..48625bf3127 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -86,6 +86,9 @@ struct pipe_screen { */ float (*get_paramf)( struct pipe_screen *, int param ); + struct pipe_context * (*context_create)( struct pipe_screen *, + void *priv ); + /** * Check if the given pipe_format is supported as a texture or * drawing surface. diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 550e2abc32a..c5c480f1f0e 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -1,7 +1,7 @@ /************************************************************************** * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -102,6 +102,11 @@ enum tgsi_file_type { #define TGSI_INTERPOLATE_PERSPECTIVE 2 #define TGSI_INTERPOLATE_COUNT 3 +#define TGSI_CYLINDRICAL_WRAP_X (1 << 0) +#define TGSI_CYLINDRICAL_WRAP_Y (1 << 1) +#define TGSI_CYLINDRICAL_WRAP_Z (1 << 2) +#define TGSI_CYLINDRICAL_WRAP_W (1 << 3) + struct tgsi_declaration { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_DECLARATION */ @@ -109,10 +114,11 @@ struct tgsi_declaration unsigned File : 4; /**< one of TGSI_FILE_x */ unsigned UsageMask : 4; /**< bitmask of TGSI_WRITEMASK_x flags */ unsigned Interpolate : 4; /**< one of TGSI_INTERPOLATE_x */ + unsigned Dimension : 1; /**< any extra dimension info? */ unsigned Semantic : 1; /**< BOOL, any semantic info? */ unsigned Centroid : 1; /**< centroid sampling? */ unsigned Invariant : 1; /**< invariant optimization? */ - unsigned Padding : 5; + unsigned CylindricalWrap:4; /**< TGSI_CYLINDRICAL_WRAP_x flags */ }; struct tgsi_declaration_range @@ -121,17 +127,24 @@ struct tgsi_declaration_range unsigned Last : 16; /**< UINT */ }; -#define TGSI_SEMANTIC_POSITION 0 -#define TGSI_SEMANTIC_COLOR 1 -#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define TGSI_SEMANTIC_FOG 3 -#define TGSI_SEMANTIC_PSIZE 4 -#define TGSI_SEMANTIC_GENERIC 5 -#define TGSI_SEMANTIC_NORMAL 6 -#define TGSI_SEMANTIC_FACE 7 -#define TGSI_SEMANTIC_EDGEFLAG 8 -#define TGSI_SEMANTIC_PRIMID 9 -#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */ +struct tgsi_declaration_dimension +{ + unsigned Index2D:16; /**< UINT */ + unsigned Padding:16; +}; + +#define TGSI_SEMANTIC_POSITION 0 +#define TGSI_SEMANTIC_COLOR 1 +#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define TGSI_SEMANTIC_FOG 3 +#define TGSI_SEMANTIC_PSIZE 4 +#define TGSI_SEMANTIC_GENERIC 5 +#define TGSI_SEMANTIC_NORMAL 6 +#define TGSI_SEMANTIC_FACE 7 +#define TGSI_SEMANTIC_EDGEFLAG 8 +#define TGSI_SEMANTIC_PRIMID 9 +#define TGSI_SEMANTIC_INSTANCEID 10 +#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ struct tgsi_declaration_semantic { @@ -162,7 +175,9 @@ union tgsi_immediate_data #define TGSI_PROPERTY_GS_INPUT_PRIM 0 #define TGSI_PROPERTY_GS_OUTPUT_PRIM 1 #define TGSI_PROPERTY_GS_MAX_VERTICES 2 -#define TGSI_PROPERTY_COUNT 3 +#define TGSI_PROPERTY_FS_COORD_ORIGIN 3 +#define TGSI_PROPERTY_FS_COORD_PIXEL_CENTER 4 +#define TGSI_PROPERTY_COUNT 5 struct tgsi_property { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ @@ -171,6 +186,12 @@ struct tgsi_property { unsigned Padding : 12; }; +#define TGSI_FS_COORD_ORIGIN_UPPER_LEFT 0 +#define TGSI_FS_COORD_ORIGIN_LOWER_LEFT 1 + +#define TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER 0 +#define TGSI_FS_COORD_PIXEL_CENTER_INTEGER 1 + struct tgsi_property_data { unsigned Data; }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 4387b92be20..68369570b95 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -43,7 +43,6 @@ #include "p_compiler.h" #include "p_defines.h" #include "p_format.h" -#include "p_refcnt.h" #include "p_screen.h" @@ -58,7 +57,7 @@ extern "C" { #define PIPE_MAX_ATTRIBS 32 #define PIPE_MAX_CLIP_PLANES 6 #define PIPE_MAX_COLOR_BUFS 8 -#define PIPE_MAX_CONSTANT 32 +#define PIPE_MAX_CONSTANT_BUFFERS 32 #define PIPE_MAX_SAMPLERS 16 #define PIPE_MAX_VERTEX_SAMPLERS 16 #define PIPE_MAX_SHADER_INPUTS 16 @@ -66,8 +65,10 @@ extern "C" { #define PIPE_MAX_TEXTURE_LEVELS 16 -/* fwd decls */ -struct pipe_surface; +struct pipe_reference +{ + int32_t count; /* atomic */ +}; /** @@ -177,15 +178,6 @@ struct pipe_clip_state }; -/** - * Constants for vertex/fragment shaders - */ -struct pipe_constant_buffer -{ - struct pipe_buffer *buffer; -}; - - struct pipe_shader_state { const struct tgsi_token *tokens; @@ -229,7 +221,7 @@ struct pipe_depth_stencil_alpha_state }; -struct pipe_blend_state +struct pipe_rt_blend_state { unsigned blend_enable:1; @@ -241,11 +233,16 @@ struct pipe_blend_state unsigned alpha_src_factor:5; /**< PIPE_BLENDFACTOR_x */ unsigned alpha_dst_factor:5; /**< PIPE_BLENDFACTOR_x */ + unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */ +}; + +struct pipe_blend_state +{ + unsigned independent_blend_enable:1; unsigned logicop_enable:1; unsigned logicop_func:4; /**< PIPE_LOGICOP_x */ - - unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */ unsigned dither:1; + struct pipe_rt_blend_state rt[PIPE_MAX_COLOR_BUFS]; }; @@ -281,7 +278,6 @@ struct pipe_sampler_state unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */ unsigned compare_func:3; /**< PIPE_FUNC_x */ unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */ - unsigned prefilter:4; /**< Wierd sampling state exposed by some api's */ float lod_bias; /**< LOD/lambda bias */ float min_lod, max_lod; /**< LOD clamp range, after bias */ float border_color[4]; @@ -375,6 +371,11 @@ struct pipe_vertex_element /** Offset of this attribute, in bytes, from the start of the vertex */ unsigned src_offset; + /** Instance data rate divisor. 0 means this is per-vertex data, + * n means per-instance data used for n consecutive instances (n > 0). + */ + unsigned instance_divisor; + /** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does * this attribute live in? */ @@ -385,38 +386,6 @@ struct pipe_vertex_element }; -/* Reference counting helper functions */ -static INLINE void -pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf) -{ - struct pipe_buffer *old_buf = *ptr; - - if (pipe_reference(&(*ptr)->reference, &buf->reference)) - old_buf->screen->buffer_destroy(old_buf); - *ptr = buf; -} - -static INLINE void -pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) -{ - struct pipe_surface *old_surf = *ptr; - - if (pipe_reference(&(*ptr)->reference, &surf->reference)) - old_surf->texture->screen->tex_surface_destroy(old_surf); - *ptr = surf; -} - -static INLINE void -pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex) -{ - struct pipe_texture *old_tex = *ptr; - - if (pipe_reference(&(*ptr)->reference, &tex->reference)) - old_tex->screen->texture_destroy(old_tex); - *ptr = tex; -} - - #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index b85f01c2b02..77e22d0a566 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -30,12 +30,12 @@ /* u_reduce_video_profile() needs these */ #include <pipe/p_compiler.h> -#include <util/u_debug.h> #include <pipe/p_defines.h> #include <pipe/p_format.h> -#include <pipe/p_refcnt.h> +#include <pipe/p_state.h> #include <pipe/p_screen.h> +#include <util/u_inlines.h> #ifdef __cplusplus extern "C" { diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h index 4d1259e1ee7..e9fa9b4d2a3 100644 --- a/src/gallium/include/state_tracker/drm_api.h +++ b/src/gallium/include/state_tracker/drm_api.h @@ -28,14 +28,19 @@ struct drm_create_screen_arg { struct drm_api { + const char *name; + + /** + * Kernel driver name, as accepted by drmOpenByName. + */ + const char *driver_name; + /** * Special buffer functions */ /*@{*/ struct pipe_screen* (*create_screen)(struct drm_api *api, int drm_fd, struct drm_create_screen_arg *arg); - struct pipe_context* (*create_context)(struct drm_api *api, - struct pipe_screen *screen); /*@}*/ /** diff --git a/src/gallium/state_trackers/Makefile b/src/gallium/state_trackers/Makefile index 265ca468c2d..0900efc664f 100644 --- a/src/gallium/state_trackers/Makefile +++ b/src/gallium/state_trackers/Makefile @@ -21,5 +21,9 @@ clean: rm -f `find . -name depend` -# Dummy install target install: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) $@) || exit 1 ; \ + fi \ + done diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index f2e5f3fb23c..5033c3c85b8 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -69,14 +69,12 @@ dri_create_context(const __GLcontextModes * visual, driParseConfigFiles(&ctx->optionCache, &screen->optionCache, sPriv->myNum, "dri"); - ctx->pipe = screen->api->create_context(screen->api, screen->pipe_screen); + ctx->pipe = screen->pipe_screen->context_create( screen->pipe_screen, + ctx ); if (ctx->pipe == NULL) goto fail; - /* used in dri_flush_frontbuffer */ - ctx->pipe->priv = ctx; - ctx->st = st_create_context(ctx->pipe, visual, st_share); if (ctx->st == NULL) goto fail; @@ -101,6 +99,12 @@ dri_destroy_context(__DRIcontext * cPriv) { struct dri_context *ctx = dri_context(cPriv); + /* note: we are freeing values and nothing more because + * driParseConfigFiles allocated values only - the rest + * is owned by screen optionCache. + */ + FREE(ctx->optionCache.values); + /* No particular reason to wait for command completion before * destroying a context, but it is probably worthwhile flushing it * to avoid having to add code elsewhere to cope with flushing a diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index f131e77ac5e..f7ed6605bf8 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -35,7 +35,6 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_inlines.h" #include "main/mtypes.h" #include "main/renderbuffer.h" #include "state_tracker/drm_api.h" @@ -47,6 +46,7 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_inlines.h" static struct pipe_surface * dri_surface_from_handle(struct drm_api *api, @@ -123,11 +123,12 @@ dri_get_buffers(__DRIdrawable * dPriv) struct dri_drawable *drawable = dri_drawable(dPriv); struct pipe_surface *surface = NULL; - struct pipe_screen *screen = dri_screen(drawable->sPriv)->pipe_screen; + struct dri_screen *st_screen = dri_screen(drawable->sPriv); + struct pipe_screen *screen = st_screen->pipe_screen; __DRIbuffer *buffers = NULL; __DRIscreen *dri_screen = drawable->sPriv; __DRIdrawable *dri_drawable = drawable->dPriv; - struct drm_api *api = ((struct dri_screen*)(dri_screen->private))->api; + struct drm_api *api = st_screen->api; boolean have_depth = FALSE; int i, count; @@ -180,6 +181,9 @@ dri_get_buffers(__DRIdrawable * dPriv) switch (buffers[i].attachment) { case __DRI_BUFFER_FRONT_LEFT: + if (!st_screen->auto_fake_front) + continue; + /* fallthrough */ case __DRI_BUFFER_FAKE_FRONT_LEFT: index = ST_SURFACE_FRONT_LEFT; format = drawable->color_format; @@ -372,7 +376,8 @@ dri_create_buffer(__DRIscreen * sPriv, /* TODO incase of double buffer visual, delay fake creation */ i = 0; drawable->attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - + if (!screen->auto_fake_front) + drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT; if (visual->doubleBufferMode) drawable->attachments[i++] = __DRI_BUFFER_BACK_LEFT; if (visual->depthBits && visual->stencilBits) diff --git a/src/gallium/state_trackers/dri/dri_extensions.c b/src/gallium/state_trackers/dri/dri_extensions.c index 8b014a2a8b8..1259813a412 100644 --- a/src/gallium/state_trackers/dri/dri_extensions.c +++ b/src/gallium/state_trackers/dri/dri_extensions.c @@ -50,6 +50,7 @@ #define need_GL_EXT_blend_func_separate #define need_GL_EXT_blend_minmax #define need_GL_EXT_cull_vertex +#define need_GL_EXT_draw_buffers2 #define need_GL_EXT_fog_coord #define need_GL_EXT_framebuffer_object #define need_GL_EXT_multi_draw_arrays @@ -98,6 +99,7 @@ static const struct dri_extension card_extensions[] = { {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, + {"GL_EXT_draw_buffers2", GL_EXT_draw_buffers2_functions}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, @@ -130,6 +132,9 @@ dri_init_extensions(struct dri_context *ctx) /* The card_extensions list should be pruned according to the * capabilities of the pipe_screen. This is actually something * that can/should be done inside st_create_context(). + * XXX Not pruning is very bogus. Always all these extensions above + * will be advertized, regardless what st_init_extensions + * (which depends on the pipe cap bits) does. */ driInitExtensions(ctx->st->ctx, card_extensions, GL_TRUE); } diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 793db087ee1..2052867309c 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -37,14 +37,12 @@ #include "dri_context.h" #include "dri_drawable.h" -#include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_inlines.h" #include "pipe/p_format.h" #include "state_tracker/drm_api.h" #include "state_tracker/dri1_api.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_cb_fbo.h" + +#include "util/u_debug.h" PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE @@ -83,7 +81,7 @@ dri_fill_in_modes(struct dri_screen *screen, unsigned num_modes; uint8_t depth_bits_array[5]; uint8_t stencil_bits_array[5]; - uint8_t msaa_samples_array[1]; + uint8_t msaa_samples_array[2]; unsigned depth_buffer_factor; unsigned back_buffer_factor; unsigned msaa_samples_factor; @@ -147,8 +145,9 @@ dri_fill_in_modes(struct dri_screen *screen, } msaa_samples_array[0] = 0; + msaa_samples_array[1] = 4; back_buffer_factor = 3; - msaa_samples_factor = 1; + msaa_samples_factor = 2; num_modes = depth_buffer_factor * back_buffer_factor * msaa_samples_factor * 4; @@ -158,7 +157,7 @@ dri_fill_in_modes(struct dri_screen *screen, depth_bits_array, stencil_bits_array, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, msaa_samples_factor); } else { __DRIconfig **configs_a8r8g8b8 = NULL; __DRIconfig **configs_x8r8g8b8 = NULL; @@ -170,7 +169,8 @@ dri_fill_in_modes(struct dri_screen *screen, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, + msaa_samples_factor); if (pf_x8r8g8b8) configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV, depth_bits_array, @@ -178,7 +178,8 @@ dri_fill_in_modes(struct dri_screen *screen, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, + msaa_samples_factor); if (configs_a8r8g8b8 && configs_x8r8g8b8) configs = driConcatConfigs(configs_x8r8g8b8, configs_a8r8g8b8); @@ -195,7 +196,7 @@ dri_fill_in_modes(struct dri_screen *screen, return NULL; } - return (const const __DRIconfig **)configs; + return (const __DRIconfig **)configs; } /** @@ -289,6 +290,8 @@ dri_init_screen2(__DRIscreen * sPriv) { struct dri_screen *screen; struct drm_create_screen_arg arg; + const __DRIdri2LoaderExtension *dri2_ext = + sPriv->dri2.loader; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -314,6 +317,9 @@ dri_init_screen2(__DRIscreen * sPriv) driParseOptionInfo(&screen->optionCache, __driConfigOptions, __driNConfigOptions); + screen->auto_fake_front = dri2_ext->base.version >= 3 && + dri2_ext->getBuffersWithFormat != NULL; + return dri_fill_in_modes(screen, 32); fail: return NULL; @@ -323,8 +329,18 @@ static void dri_destroy_screen(__DRIscreen * sPriv) { struct dri_screen *screen = dri_screen(sPriv); + int i; screen->pipe_screen->destroy(screen->pipe_screen); + + for (i = 0; i < (1 << screen->optionCache.tableSize); ++i) { + FREE(screen->optionCache.info[i].name); + FREE(screen->optionCache.info[i].ranges); + } + + FREE(screen->optionCache.info); + FREE(screen->optionCache.values); + FREE(screen); sPriv->private = NULL; } diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h index 03387a0e813..75a0ee4250e 100644 --- a/src/gallium/state_trackers/dri/dri_screen.h +++ b/src/gallium/state_trackers/dri/dri_screen.h @@ -59,6 +59,7 @@ struct dri_screen struct pipe_screen *pipe_screen; boolean d_depth_bits_last; boolean sd_depth_bits_last; + boolean auto_fake_front; }; /** cast wrapper */ diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index e825aa718b6..794785006f5 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -1,19 +1,73 @@ TOP = ../../../.. include $(TOP)/configs/current -LIBNAME = egldrm - -LIBRARY_INCLUDES = \ +common_INCLUDES = \ + -I. \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/mesa/drivers/dri/common \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include \ -I$(TOP)/src/egl/main \ + -I$(TOP)/include + +common_SOURCES = $(wildcard common/*.c) +common_OBJECTS = $(common_SOURCES:.c=.o) + + +x11_INCLUDES = \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/glx \ + -I$(TOP)/src/mesa \ $(shell pkg-config --cflags-only-I libdrm) +x11_SOURCES = $(wildcard x11/*.c) \ + $(TOP)/src/glx/dri2.c +x11_OBJECTS = $(x11_SOURCES:.c=.o) + + +kms_INCLUDES = $(shell pkg-config --cflags-only-I libdrm) +kms_SOURCES = $(wildcard kms/*.c) +kms_OBJECTS = $(kms_SOURCES:.c=.o) + + +ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) +ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) +ALL_OBJECTS = $(common_OBJECTS) $(x11_OBJECTS) $(kms_OBJECTS) + +##### TARGETS ##### + +EGL_DISPLAYS_MODS = $(foreach dpy, $(EGL_DISPLAYS), libegl$(dpy).a) + +default: depend $(EGL_DISPLAYS_MODS) + + +libeglx11.a: $(x11_OBJECTS) $(common_OBJECTS) Makefile + $(MKLIB) -o eglx11 -static $(x11_OBJECTS) $(common_OBJECTS) + +libeglkms.a: $(kms_OBJECTS) $(common_OBJECTS) Makefile + $(MKLIB) -o eglkms -static $(kms_OBJECTS) $(common_OBJECTS) + +depend: + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(ALL_INCLUDES) $(ALL_SOURCES) 2> /dev/null + +clean: + rm -f $(ALL_OBJECTS) + rm -f $(EGL_DISPLAYS_MODS) + rm -f depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +$(common_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ -C_SOURCES = $(wildcard ./*.c) +$(x11_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(x11_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ +$(kms_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(kms_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ -include ../../Makefile.template +sinclude depend diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c new file mode 100644 index 00000000000..80dd1269955 --- /dev/null +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -0,0 +1,1351 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_rect.h" +#include "util/u_inlines.h" +#include "egldriver.h" +#include "eglcurrent.h" +#include "eglconfigutil.h" +#include "egllog.h" + +#include "native.h" +#include "egl_g3d.h" +#include "egl_st.h" + +/** + * Validate the draw/read surfaces of the context. + */ +static void +egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct pipe_screen *screen = gdpy->native->screen; + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = { + ST_SURFACE_FRONT_LEFT, + ST_SURFACE_BACK_LEFT, + ST_SURFACE_FRONT_RIGHT, + ST_SURFACE_BACK_RIGHT, + }; + EGLint num_surfaces, s; + + /* validate draw and/or read buffers */ + num_surfaces = (gctx->base.ReadSurface == gctx->base.DrawSurface) ? 1 : 2; + for (s = 0; s < num_surfaces; s++) { + struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + struct egl_g3d_surface *gsurf; + struct egl_g3d_buffer *gbuf; + EGLint att; + + if (s == 0) { + gsurf = egl_g3d_surface(gctx->base.DrawSurface); + gbuf = &gctx->draw; + } + else { + gsurf = egl_g3d_surface(gctx->base.ReadSurface); + gbuf = &gctx->read; + } + + if (!gctx->force_validate) { + unsigned int seq_num; + + gsurf->native->validate(gsurf->native, gbuf->attachment_mask, + &seq_num, NULL, NULL, NULL); + /* skip validation */ + if (gsurf->sequence_number == seq_num) + continue; + } + + pipe_surface_reference(&gsurf->render_surface, NULL); + memset(textures, 0, sizeof(textures)); + + gsurf->native->validate(gsurf->native, gbuf->attachment_mask, + &gsurf->sequence_number, textures, + &gsurf->base.Width, &gsurf->base.Height); + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct pipe_texture *pt = textures[att]; + struct pipe_surface *ps; + + if (native_attachment_mask_test(gbuf->attachment_mask, att) && pt) { + ps = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + gctx->stapi->st_set_framebuffer_surface(gbuf->st_fb, + st_att_map[att], ps); + + if (gsurf->render_att == att) + pipe_surface_reference(&gsurf->render_surface, ps); + + pipe_surface_reference(&ps, NULL); + pipe_texture_reference(&pt, NULL); + } + } + + gctx->stapi->st_resize_framebuffer(gbuf->st_fb, + gsurf->base.Width, gsurf->base.Height); + } + + gctx->force_validate = EGL_FALSE; + +} + +/** + * Create a st_framebuffer. + */ +static struct st_framebuffer * +create_framebuffer(_EGLContext *ctx, _EGLSurface *surf) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config); + + return gctx->stapi->st_create_framebuffer(&gconf->native->mode, + gconf->native->color_format, gconf->native->depth_format, + gconf->native->stencil_format, + gsurf->base.Width, gsurf->base.Height, &gsurf->base); +} + +/** + * Update the attachments of draw/read surfaces. + */ +static void +egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + EGLint s; + + /* route draw and read buffers' attachments */ + for (s = 0; s < 2; s++) { + struct egl_g3d_surface *gsurf; + struct egl_g3d_buffer *gbuf; + + if (s == 0) { + gsurf = egl_g3d_surface(gctx->base.DrawSurface); + gbuf = &gctx->draw; + } + else { + gsurf = egl_g3d_surface(gctx->base.ReadSurface); + gbuf = &gctx->read; + } + + gbuf->attachment_mask = (1 << gsurf->render_att); + + /* FIXME OpenGL defaults to draw the front or back buffer when the + * context is single-buffered or double-buffered respectively. In EGL, + * however, the buffer to be drawn is determined by the surface, instead + * of the context. As a result, rendering to a pixmap surface with a + * double-buffered context does not work as expected. + * + * gctx->stapi->st_draw_front_buffer(gctx->st_ctx, natt == + * NATIVE_ATTACHMENT_FRONT_LEFT); + */ + + /* + * FIXME If the back buffer is asked for here, and the front buffer is + * later needed by the client API (e.g. glDrawBuffer is called to draw + * the front buffer), it will create a new pipe texture and draw there. + * One fix is to ask for both buffers here, but it would be a waste if + * the front buffer is never used. A better fix is to add a callback to + * the pipe screen with context private (just like flush_frontbuffer). + */ + } +} + +/** + * Reallocate the context's framebuffers after draw/read surfaces change. + */ +static EGLBoolean +egl_g3d_realloc_context(_EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + struct egl_g3d_surface *gdraw = egl_g3d_surface(gctx->base.DrawSurface); + struct egl_g3d_surface *gread = egl_g3d_surface(gctx->base.ReadSurface); + + /* unreference the old framebuffers */ + if (gctx->draw.st_fb) { + EGLBoolean is_equal = (gctx->draw.st_fb == gctx->read.st_fb); + void *priv; + + priv = gctx->stapi->st_framebuffer_private(gctx->draw.st_fb); + if (!gdraw || priv != (void *) &gdraw->base) { + gctx->stapi->st_unreference_framebuffer(gctx->draw.st_fb); + gctx->draw.st_fb = NULL; + gctx->draw.attachment_mask = 0x0; + } + + if (is_equal) { + gctx->read.st_fb = NULL; + gctx->draw.attachment_mask = 0x0; + } + else { + priv = gctx->stapi->st_framebuffer_private(gctx->read.st_fb); + if (!gread || priv != (void *) &gread->base) { + gctx->stapi->st_unreference_framebuffer(gctx->read.st_fb); + gctx->read.st_fb = NULL; + gctx->draw.attachment_mask = 0x0; + } + } + } + + if (!gdraw) + return EGL_TRUE; + + /* create the draw fb */ + if (!gctx->draw.st_fb) { + gctx->draw.st_fb = create_framebuffer(&gctx->base, &gdraw->base); + if (!gctx->draw.st_fb) + return EGL_FALSE; + } + + /* create the read fb */ + if (!gctx->read.st_fb) { + if (gread != gdraw) { + gctx->read.st_fb = create_framebuffer(&gctx->base, &gread->base); + if (!gctx->read.st_fb) { + gctx->stapi->st_unreference_framebuffer(gctx->draw.st_fb); + gctx->draw.st_fb = NULL; + return EGL_FALSE; + } + } + else { + /* there is no st_reference_framebuffer... */ + gctx->read.st_fb = gctx->draw.st_fb; + } + } + + egl_g3d_route_context(dpy, &gctx->base); + gctx->force_validate = EGL_TRUE; + + return EGL_TRUE; +} + +/** + * Return the state tracker for the given context. + */ +static const struct egl_g3d_st * +egl_g3d_choose_st(_EGLDriver *drv, _EGLContext *ctx) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + const struct egl_g3d_st *stapi; + EGLint idx = -1; + + switch (ctx->ClientAPI) { + case EGL_OPENGL_ES_API: + switch (ctx->ClientVersion) { + case 1: + idx = EGL_G3D_ST_OPENGL_ES; + break; + case 2: + idx = EGL_G3D_ST_OPENGL_ES2; + break; + default: + _eglLog(_EGL_WARNING, "unknown client version %d", + ctx->ClientVersion); + break; + } + break; + case EGL_OPENVG_API: + idx = EGL_G3D_ST_OPENVG; + break; + case EGL_OPENGL_API: + idx = EGL_G3D_ST_OPENGL; + break; + default: + _eglLog(_EGL_WARNING, "unknown client API 0x%04x", ctx->ClientAPI); + break; + } + + stapi = (idx >= 0) ? gdrv->stapis[idx] : NULL; + return stapi; +} + +/** + * Initialize the state trackers. + */ +static void +egl_g3d_init_st(_EGLDriver *drv) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + EGLint i; + + /* already initialized */ + if (gdrv->api_mask) + return; + + for (i = 0; i < NUM_EGL_G3D_STS; i++) { + gdrv->stapis[i] = egl_g3d_get_st(i); + if (gdrv->stapis[i]) + gdrv->api_mask |= gdrv->stapis[i]->api_bit; + } + + if (gdrv->api_mask) + _eglLog(_EGL_DEBUG, "Driver API mask: 0x%x", gdrv->api_mask); + else + _eglLog(_EGL_WARNING, "No supported client API"); +} + +/** + * Get the probe object of the display. + * + * Note that this function may be called before the display is initialized. + */ +static struct native_probe * +egl_g3d_get_probe(_EGLDriver *drv, _EGLDisplay *dpy) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + struct native_probe *nprobe; + + nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); + if (!nprobe || nprobe->display != dpy->NativeDisplay) { + if (nprobe) + nprobe->destroy(nprobe); + nprobe = native_create_probe(dpy->NativeDisplay); + _eglSetProbeCache(gdrv->probe_key, (void *) nprobe); + } + + return nprobe; +} + +/** + * Destroy the probe object of the display. The display may be NULL. + * + * Note that this function may be called before the display is initialized. + */ +static void +egl_g3d_destroy_probe(_EGLDriver *drv, _EGLDisplay *dpy) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + struct native_probe *nprobe; + + nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); + if (nprobe && (!dpy || nprobe->display == dpy->NativeDisplay)) { + nprobe->destroy(nprobe); + _eglSetProbeCache(gdrv->probe_key, NULL); + } +} + +/** + * Return an API mask that consists of the state trackers that supports the + * given mode. + * + * FIXME add st_is_mode_supported()? + */ +static EGLint +get_mode_api_mask(const __GLcontextModes *mode, EGLint api_mask) +{ + EGLint check; + + /* OpenGL ES 1.x and 2.x are checked together */ + check = EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT; + if (api_mask & check) { + /* this is required by EGL, not by OpenGL ES */ + if (mode->drawableType & GLX_WINDOW_BIT && !mode->doubleBufferMode) + api_mask &= ~check; + } + + check = EGL_OPENVG_BIT; + if (api_mask & check) { + /* vega st needs the depth/stencil rb */ + if (!mode->depthBits && !mode->stencilBits) + api_mask &= ~check; + } + + return api_mask; +} + +#ifdef EGL_MESA_screen_surface + +static void +egl_g3d_add_screens(_EGLDriver *drv, _EGLDisplay *dpy) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + const struct native_connector **native_connectors; + EGLint num_connectors, i; + + native_connectors = + gdpy->native->modeset->get_connectors(gdpy->native, &num_connectors, NULL); + if (!num_connectors) { + if (native_connectors) + free(native_connectors); + return; + } + + for (i = 0; i < num_connectors; i++) { + const struct native_connector *nconn = native_connectors[i]; + struct egl_g3d_screen *gscr; + const struct native_mode **native_modes; + EGLint num_modes, j; + + /* TODO support for hotplug */ + native_modes = + gdpy->native->modeset->get_modes(gdpy->native, nconn, &num_modes); + if (!num_modes) { + if (native_modes) + free(native_modes); + continue; + } + + gscr = CALLOC_STRUCT(egl_g3d_screen); + if (!gscr) { + free(native_modes); + continue; + } + + _eglInitScreen(&gscr->base); + + for (j = 0; j < num_modes; j++) { + const struct native_mode *nmode = native_modes[j]; + _EGLMode *mode; + + mode = _eglAddNewMode(&gscr->base, nmode->width, nmode->height, + nmode->refresh_rate, nmode->desc); + if (!mode) + break; + /* gscr->native_modes and gscr->base.Modes should be consistent */ + assert(mode == &gscr->base.Modes[j]); + } + + gscr->native = nconn; + gscr->native_modes = native_modes; + + _eglAddScreen(dpy, &gscr->base); + } + + free(native_connectors); +} + +#endif /* EGL_MESA_screen_surface */ + +/** + * Add configs to display and return the next config ID. + */ +static EGLint +egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + const struct native_config **native_configs; + int num_configs, i; + + native_configs = gdpy->native->get_configs(gdpy->native, + &num_configs); + if (!num_configs) { + if (native_configs) + free(native_configs); + return id; + } + + for (i = 0; i < num_configs; i++) { + EGLint api_mask; + struct egl_g3d_config *gconf; + EGLBoolean valid; + + gconf = CALLOC_STRUCT(egl_g3d_config); + if (!gconf) + continue; + + _eglInitConfig(&gconf->base, dpy, id); + + api_mask = get_mode_api_mask(&native_configs[i]->mode, gdrv->api_mask); + if (!api_mask) { + _eglLog(_EGL_DEBUG, "no state tracker supports config 0x%x", + native_configs[i]->mode.visualID); + } + + valid = _eglConfigFromContextModesRec(&gconf->base, + &native_configs[i]->mode, api_mask, api_mask); + if (valid) { +#ifdef EGL_MESA_screen_surface + /* check if scanout surface bit is set */ + if (native_configs[i]->scanout_bit) { + EGLint val = GET_CONFIG_ATTRIB(&gconf->base, EGL_SURFACE_TYPE); + val |= EGL_SCREEN_BIT_MESA; + SET_CONFIG_ATTRIB(&gconf->base, EGL_SURFACE_TYPE, val); + } +#endif + valid = _eglValidateConfig(&gconf->base, EGL_FALSE); + } + if (!valid) { + _eglLog(_EGL_DEBUG, "skip invalid config 0x%x", + native_configs[i]->mode.visualID); + free(gconf); + continue; + } + + gconf->native = native_configs[i]; + _eglAddConfig(dpy, &gconf->base); + id++; + } + + free(native_configs); + return id; +} + +/** + * Flush the front buffer of the context's draw surface. + */ +static void +egl_g3d_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, void *context_private) +{ + struct egl_g3d_context *gctx = egl_g3d_context(context_private); + struct egl_g3d_surface *gsurf = egl_g3d_surface(gctx->base.DrawSurface); + + if (gsurf) + gsurf->native->flush_frontbuffer(gsurf->native); +} + +/** + * Re-validate the context. + */ +static void +egl_g3d_update_buffer(struct pipe_screen *screen, void *context_private) +{ + struct egl_g3d_context *gctx = egl_g3d_context(context_private); + + /** + * It is likely that the surface has changed when this function is called. + * Set force_validate to skip an unnecessary check. + */ + gctx->force_validate = EGL_TRUE; + egl_g3d_validate_context(gctx->base.Resource.Display, &gctx->base); +} + +static EGLBoolean +egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + EGLint i; + + _eglReleaseDisplayResources(drv, dpy); + _eglCleanupDisplay(dpy); + + if (dpy->Screens) { + for (i = 0; i < dpy->NumScreens; i++) { + struct egl_g3d_screen *gscr = egl_g3d_screen(dpy->Screens[i]); + free(gscr->native_modes); + free(gscr); + } + free(dpy->Screens); + } + + if (gdpy->native) + gdpy->native->destroy(gdpy->native); + + free(gdpy); + dpy->DriverData = NULL; + + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, + EGLint *major, EGLint *minor) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + struct egl_g3d_display *gdpy; + + /* the probe object is unlikely to be needed again */ + egl_g3d_destroy_probe(drv, dpy); + + gdpy = CALLOC_STRUCT(egl_g3d_display); + if (!gdpy) { + _eglError(EGL_BAD_ALLOC, "eglInitialize"); + goto fail; + } + dpy->DriverData = gdpy; + + gdpy->native = native_create_display(dpy->NativeDisplay); + if (!gdpy->native) { + _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); + goto fail; + } + + gdpy->native->screen->flush_frontbuffer = egl_g3d_flush_frontbuffer; + gdpy->native->screen->update_buffer = egl_g3d_update_buffer; + + egl_g3d_init_st(&gdrv->base); + dpy->ClientAPIsMask = gdrv->api_mask; + + if (egl_g3d_add_configs(drv, dpy, 1) == 1) { + _eglError(EGL_NOT_INITIALIZED, "eglInitialize(unable to add configs)"); + goto fail; + } + +#ifdef EGL_MESA_screen_surface + /* enable MESA_screen_surface */ + if (gdpy->native->modeset) { + dpy->Extensions.MESA_screen_surface = EGL_TRUE; + egl_g3d_add_screens(drv, dpy); + } +#endif + + *major = 1; + *minor = 4; + + return EGL_TRUE; + +fail: + if (gdpy) + egl_g3d_terminate(drv, dpy); + return EGL_FALSE; +} + +static _EGLContext * +egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, + _EGLContext *share, const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_context *gshare = egl_g3d_context(share); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_context *gctx; + const __GLcontextModes *mode; + + gctx = CALLOC_STRUCT(egl_g3d_context); + if (!gctx) { + _eglError(EGL_BAD_ALLOC, "eglCreateContext"); + return NULL; + } + + if (!_eglInitContext(&gctx->base, dpy, conf, attribs)) { + free(gctx); + return NULL; + } + + gctx->stapi = egl_g3d_choose_st(drv, &gctx->base); + if (!gctx->stapi) { + free(gctx); + return NULL; + } + + mode = &gconf->native->mode; + + gctx->pipe = gdpy->native->screen->context_create( + gdpy->native->screen, + (void *) &gctx->base); + + if (!gctx->pipe) { + free(gctx); + return NULL; + } + + gctx->st_ctx = gctx->stapi->st_create_context(gctx->pipe, mode, + (gshare) ? gshare->st_ctx : NULL); + if (!gctx->st_ctx) { + gctx->pipe->destroy(gctx->pipe); + free(gctx); + return NULL; + } + + return &gctx->base; +} + +/** + * Destroy a context. + */ +static void +destroy_context(_EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + + /* FIXME a context might live longer than its display */ + if (!dpy->Initialized) + _eglLog(_EGL_FATAL, "destroy a context with an unitialized display"); + + egl_g3d_realloc_context(dpy, &gctx->base); + /* it will destroy the associated pipe context */ + gctx->stapi->st_destroy_context(gctx->st_ctx); + + free(gctx); +} + +static EGLBoolean +egl_g3d_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) +{ + if (!_eglIsContextBound(ctx)) + destroy_context(dpy, ctx); + return EGL_TRUE; +} + +static EGLBoolean +init_surface_geometry(_EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + return gsurf->native->validate(gsurf->native, 0x0, + &gsurf->sequence_number, NULL, + &gsurf->base.Width, &gsurf->base.Height); +} + +static _EGLSurface * +egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, EGLNativeWindowType win, + const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreateWindowSurface"); + return NULL; + } + + if (!_eglInitSurface(&gsurf->base, dpy, EGL_WINDOW_BIT, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->create_window_surface(gdpy->native, win, gconf->native); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + + gsurf->render_att = (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER || + !gconf->native->mode.doubleBufferMode) ? + NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + + return &gsurf->base; +} + +static _EGLSurface * +egl_g3d_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, EGLNativePixmapType pix, + const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreatePixmapSurface"); + return NULL; + } + + if (!_eglInitSurface(&gsurf->base, dpy, EGL_PIXMAP_BIT, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->create_pixmap_surface(gdpy->native, pix, gconf->native); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + + gsurf->render_att = NATIVE_ATTACHMENT_FRONT_LEFT; + + return &gsurf->base; +} + +static _EGLSurface * +egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); + return NULL; + } + + if (!_eglInitSurface(&gsurf->base, dpy, EGL_PBUFFER_BIT, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->create_pbuffer_surface(gdpy->native, gconf->native, + gsurf->base.Width, gsurf->base.Height); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + + gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? + NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + + return &gsurf->base; +} + +/** + * Destroy a surface. + */ +static void +destroy_surface(_EGLDisplay *dpy, _EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + /* FIXME a surface might live longer than its display */ + if (!dpy->Initialized) + _eglLog(_EGL_FATAL, "destroy a surface with an unitialized display"); + + pipe_surface_reference(&gsurf->render_surface, NULL); + gsurf->native->destroy(gsurf->native); + free(gsurf); +} + +static EGLBoolean +egl_g3d_destroy_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) +{ + if (!_eglIsSurfaceBound(surf)) + destroy_surface(dpy, surf); + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_make_current(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLSurface *draw, _EGLSurface *read, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + struct egl_g3d_surface *gdraw = egl_g3d_surface(draw); + struct egl_g3d_context *old_gctx; + EGLBoolean ok = EGL_TRUE; + + /* bind the new context and return the "orphaned" one */ + if (!_eglBindContext(&ctx, &draw, &read)) + return EGL_FALSE; + old_gctx = egl_g3d_context(ctx); + + if (old_gctx) { + /* flush old context */ + old_gctx->stapi->st_flush(old_gctx->st_ctx, + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); + + /* + * The old context is no longer current, and egl_g3d_realloc_context() + * should be called to destroy the framebuffers. However, it is possible + * that it will be made current again with the same draw/read surfaces. + * It might be better to keep it around. + */ + } + + if (gctx) { + ok = egl_g3d_realloc_context(dpy, &gctx->base); + if (ok) { + ok = gctx->stapi->st_make_current(gctx->st_ctx, + gctx->draw.st_fb, gctx->read.st_fb); + if (ok) { + egl_g3d_validate_context(dpy, &gctx->base); + if (gdraw->base.Type == EGL_WINDOW_BIT) { + gctx->base.WindowRenderBuffer = + (gdraw->render_att == NATIVE_ATTACHMENT_FRONT_LEFT) ? + EGL_SINGLE_BUFFER : EGL_BACK_BUFFER; + } + } + } + } + else if (old_gctx) { + ok = old_gctx->stapi->st_make_current(NULL, NULL, NULL); + old_gctx->base.WindowRenderBuffer = EGL_NONE; + } + + if (ctx && !_eglIsContextLinked(ctx)) + destroy_context(dpy, ctx); + if (draw && !_eglIsSurfaceLinked(draw)) + destroy_surface(dpy, draw); + if (read && read != draw && !_eglIsSurfaceLinked(read)) + destroy_surface(dpy, read); + + return ok; +} + +static EGLBoolean +egl_g3d_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + _EGLContext *ctx = _eglGetCurrentContext(); + struct egl_g3d_context *gctx = NULL; + + /* no-op for pixmap or pbuffer surface */ + if (gsurf->base.Type == EGL_PIXMAP_BIT || + gsurf->base.Type == EGL_PBUFFER_BIT) + return EGL_TRUE; + + /* or when the surface is single-buffered */ + if (gsurf->render_att == NATIVE_ATTACHMENT_FRONT_LEFT) + return EGL_TRUE; + + if (ctx && ctx->DrawSurface == surf) + gctx = egl_g3d_context(ctx); + + /* flush if the surface is current */ + if (gctx) + gctx->stapi->st_notify_swapbuffers(gctx->draw.st_fb); + + /* + * We drew on the back buffer, unless there was no back buffer. + * In that case, we drew on the front buffer. Either case, we call + * swap_buffers. + */ + if (!gsurf->native->swap_buffers(gsurf->native)) + return EGL_FALSE; + + if (gctx) { + struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config); + + /* force validation if the swap method is not copy */ + if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) { + gctx->force_validate = EGL_TRUE; + egl_g3d_validate_context(dpy, &gctx->base); + } + } + + return EGL_TRUE; +} + +/** + * Find a config that supports the pixmap. + */ +static _EGLConfig * +find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf; + EGLint i; + + for (i = 0; i < dpy->NumConfigs; i++) { + gconf = egl_g3d_config(dpy->Configs[i]); + if (gdpy->native->is_pixmap_supported(gdpy->native, pix, gconf->native)) + break; + } + + return (i < dpy->NumConfigs) ? &gconf->base : NULL; +} + +/** + * Get the pipe surface of the given attachment of the native surface. + */ +static struct pipe_surface * +get_pipe_surface(struct native_display *ndpy, struct native_surface *nsurf, + enum native_attachment natt) +{ + struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + struct pipe_surface *psurf; + + textures[natt] = NULL; + nsurf->validate(nsurf, 1 << natt, NULL, textures, NULL, NULL); + if (!textures[natt]) + return NULL; + + psurf = ndpy->screen->get_tex_surface(ndpy->screen, textures[natt], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_texture_reference(&textures[natt], NULL); + + return psurf; +} + +static EGLBoolean +egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, + EGLNativePixmapType target) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + _EGLContext *ctx = _eglGetCurrentContext(); + struct egl_g3d_config *gconf; + struct native_surface *nsurf; + struct pipe_screen *screen = gdpy->native->screen; + struct pipe_surface *psurf; + + if (!gsurf->render_surface) + return EGL_TRUE; + + gconf = egl_g3d_config(find_pixmap_config(dpy, target)); + if (!gconf) + return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); + + nsurf = gdpy->native->create_pixmap_surface(gdpy->native, + target, gconf->native); + if (!nsurf) + return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); + + /* flush if the surface is current */ + if (ctx && ctx->DrawSurface == &gsurf->base) { + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + gctx->stapi->st_flush(gctx->st_ctx, + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); + } + + psurf = get_pipe_surface(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); + if (psurf) { + struct pipe_context pipe; + + /** + * XXX This is hacky. If we might allow the EGLDisplay to create a pipe + * context of its own and use the blitter context for this. + */ + memset(&pipe, 0, sizeof(pipe)); + pipe.screen = screen; + + util_surface_copy(&pipe, FALSE, psurf, 0, 0, + gsurf->render_surface, 0, 0, psurf->width, psurf->height); + + pipe_surface_reference(&psurf, NULL); + nsurf->flush_frontbuffer(nsurf); + } + + nsurf->destroy(nsurf); + + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + gctx->stapi->st_finish(gctx->st_ctx); + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_wait_native(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine) +{ + _EGLContext *ctx = _eglGetCurrentContext(); + + if (engine != EGL_CORE_NATIVE_ENGINE) + return _eglError(EGL_BAD_PARAMETER, "eglWaitNative"); + + if (ctx && ctx->DrawSurface) { + struct egl_g3d_surface *gsurf = egl_g3d_surface(ctx->DrawSurface); + gsurf->native->wait(gsurf->native); + } + + return EGL_TRUE; +} + +static _EGLProc +egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + _EGLProc proc; + EGLint i; + + /* in case this is called before a display is initialized */ + egl_g3d_init_st(&gdrv->base); + + for (i = 0; i < NUM_EGL_G3D_STS; i++) { + const struct egl_g3d_st *stapi = gdrv->stapis[i]; + if (stapi) { + proc = (_EGLProc) stapi->st_get_proc_address(procname); + if (proc) + return proc; + } + } + + return (_EGLProc) NULL; +} + +static EGLBoolean +egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLSurface *surf, EGLint buffer) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + _EGLContext *es1 = _eglGetAPIContext(EGL_OPENGL_ES_API); + struct egl_g3d_context *gctx; + enum pipe_format target_format; + int target; + + if (!gsurf || gsurf->base.Type != EGL_PBUFFER_BIT) + return _eglError(EGL_BAD_SURFACE, "eglBindTexImage"); + if (buffer != EGL_BACK_BUFFER) + return _eglError(EGL_BAD_PARAMETER, "eglBindTexImage"); + if (gsurf->base.BoundToTexture) + return _eglError(EGL_BAD_ACCESS, "eglBindTexImage"); + + switch (gsurf->base.TextureFormat) { + case EGL_TEXTURE_RGB: + target_format = PIPE_FORMAT_R8G8B8_UNORM; + break; + case EGL_TEXTURE_RGBA: + target_format = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + default: + return _eglError(EGL_BAD_MATCH, "eglBindTexImage"); + } + + switch (gsurf->base.TextureTarget) { + case EGL_TEXTURE_2D: + target = ST_TEXTURE_2D; + break; + default: + return _eglError(EGL_BAD_MATCH, "eglBindTexImage"); + } + + if (!es1) + return EGL_TRUE; + if (!gsurf->render_surface) + return EGL_FALSE; + + /* flush properly if the surface is bound */ + if (gsurf->base.CurrentContext) { + gctx = egl_g3d_context(gsurf->base.CurrentContext); + gctx->stapi->st_flush(gctx->st_ctx, + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); + } + + gctx = egl_g3d_context(es1); + gctx->stapi->st_bind_texture_surface(gsurf->render_surface, + target, gsurf->base.MipmapLevel, target_format); + + gsurf->base.BoundToTexture = EGL_TRUE; + + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_release_tex_image(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLSurface *surf, EGLint buffer) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + if (!gsurf || gsurf->base.Type != EGL_PBUFFER_BIT || + !gsurf->base.BoundToTexture) + return _eglError(EGL_BAD_SURFACE, "eglReleaseTexImage"); + if (buffer != EGL_BACK_BUFFER) + return _eglError(EGL_BAD_PARAMETER, "eglReleaseTexImage"); + + if (gsurf->render_surface) { + _EGLContext *ctx = _eglGetAPIContext(EGL_OPENGL_ES_API); + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + + /* what if the context the surface binds to is no longer current? */ + if (gctx) + gctx->stapi->st_unbind_texture_surface(gsurf->render_surface, + ST_TEXTURE_2D, gsurf->base.MipmapLevel); + } + + gsurf->base.BoundToTexture = EGL_FALSE; + + return EGL_TRUE; +} + +#ifdef EGL_MESA_screen_surface + +static _EGLSurface * +egl_g3d_create_screen_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); + return NULL; + } + + if (!_eglInitSurface(&gsurf->base, dpy, + EGL_SCREEN_BIT_MESA, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->modeset->create_scanout_surface(gdpy->native, + gconf->native, gsurf->base.Width, gsurf->base.Height); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? + NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + + return &gsurf->base; +} + +static EGLBoolean +egl_g3d_show_screen_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLScreen *scr, _EGLSurface *surf, + _EGLMode *mode) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_screen *gscr = egl_g3d_screen(scr); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + struct native_surface *nsurf; + const struct native_mode *nmode; + EGLBoolean changed; + + if (gsurf) { + EGLint idx; + + if (!mode) + return _eglError(EGL_BAD_MATCH, "eglShowSurfaceMESA"); + if (gsurf->base.Type != EGL_SCREEN_BIT_MESA) + return _eglError(EGL_BAD_SURFACE, "eglShowScreenSurfaceMESA"); + if (gsurf->base.Width < mode->Width || gsurf->base.Height < mode->Height) + return _eglError(EGL_BAD_MATCH, + "eglShowSurfaceMESA(surface smaller than mode size)"); + + /* find the index of the mode */ + for (idx = 0; idx < gscr->base.NumModes; idx++) + if (mode == &gscr->base.Modes[idx]) + break; + if (idx >= gscr->base.NumModes) { + return _eglError(EGL_BAD_MODE_MESA, + "eglShowSurfaceMESA(unknown mode)"); + } + + nsurf = gsurf->native; + nmode = gscr->native_modes[idx]; + } + else { + if (mode) + return _eglError(EGL_BAD_MATCH, "eglShowSurfaceMESA"); + + /* disable the screen */ + nsurf = NULL; + nmode = NULL; + } + + /* TODO surface panning by CRTC choosing */ + changed = gdpy->native->modeset->program(gdpy->native, 0, nsurf, + gscr->base.OriginX, gscr->base.OriginY, &gscr->native, 1, nmode); + if (changed) { + gscr->base.CurrentSurface = &gsurf->base; + gscr->base.CurrentMode = mode; + } + + return changed; +} + +#endif /* EGL_MESA_screen_surface */ + +static EGLint +egl_g3d_probe(_EGLDriver *drv, _EGLDisplay *dpy) +{ + struct native_probe *nprobe; + enum native_probe_result res; + EGLint score; + + nprobe = egl_g3d_get_probe(drv, dpy); + res = native_get_probe_result(nprobe); + + switch (res) { + case NATIVE_PROBE_UNKNOWN: + default: + score = 0; + break; + case NATIVE_PROBE_FALLBACK: + score = 40; + break; + case NATIVE_PROBE_SUPPORTED: + score = 50; + break; + case NATIVE_PROBE_EXACT: + score = 100; + break; + } + + return score; +} + +static void +egl_g3d_unload(_EGLDriver *drv) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + + egl_g3d_destroy_probe(drv, NULL); + free(gdrv); +} + +_EGLDriver * +_eglMain(const char *args) +{ + static char driver_name[64]; + struct egl_g3d_driver *gdrv; + + snprintf(driver_name, sizeof(driver_name), + "Gallium/%s", native_get_name()); + + gdrv = CALLOC_STRUCT(egl_g3d_driver); + if (!gdrv) + return NULL; + + _eglInitDriverFallbacks(&gdrv->base); + + gdrv->base.API.Initialize = egl_g3d_initialize; + gdrv->base.API.Terminate = egl_g3d_terminate; + gdrv->base.API.CreateContext = egl_g3d_create_context; + gdrv->base.API.DestroyContext = egl_g3d_destroy_context; + gdrv->base.API.CreateWindowSurface = egl_g3d_create_window_surface; + gdrv->base.API.CreatePixmapSurface = egl_g3d_create_pixmap_surface; + gdrv->base.API.CreatePbufferSurface = egl_g3d_create_pbuffer_surface; + gdrv->base.API.DestroySurface = egl_g3d_destroy_surface; + gdrv->base.API.MakeCurrent = egl_g3d_make_current; + gdrv->base.API.SwapBuffers = egl_g3d_swap_buffers; + gdrv->base.API.CopyBuffers = egl_g3d_copy_buffers; + gdrv->base.API.WaitClient = egl_g3d_wait_client; + gdrv->base.API.WaitNative = egl_g3d_wait_native; + gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address; + + gdrv->base.API.BindTexImage = egl_g3d_bind_tex_image; + gdrv->base.API.ReleaseTexImage = egl_g3d_release_tex_image; + +#ifdef EGL_MESA_screen_surface + gdrv->base.API.CreateScreenSurfaceMESA = egl_g3d_create_screen_surface; + gdrv->base.API.ShowScreenSurfaceMESA = egl_g3d_show_screen_surface; +#endif + + gdrv->base.Name = driver_name; + gdrv->base.Probe = egl_g3d_probe; + gdrv->base.Unload = egl_g3d_unload; + + /* the key is " EGL G3D" */ + gdrv->probe_key = 0x0E61063D; + + return &gdrv->base; +} diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h new file mode 100644 index 00000000000..5d2d9c481ab --- /dev/null +++ b/src/gallium/state_trackers/egl/common/egl_g3d.h @@ -0,0 +1,94 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EGL_G3D_H_ +#define _EGL_G3D_H_ + +#include "pipe/p_compiler.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" +#include "egldriver.h" +#include "egldisplay.h" +#include "eglcontext.h" +#include "eglsurface.h" +#include "eglconfig.h" +#include "eglscreen.h" +#include "eglmode.h" + +#include "native.h" +#include "egl_st.h" + +struct egl_g3d_driver { + _EGLDriver base; + const struct egl_g3d_st *stapis[NUM_EGL_G3D_STS]; + EGLint api_mask; + + EGLint probe_key; +}; + +struct egl_g3d_display { + struct native_display *native; +}; + +struct egl_g3d_buffer { + struct st_framebuffer *st_fb; + uint attachment_mask; +}; + +struct egl_g3d_context { + _EGLContext base; + + const struct egl_g3d_st *stapi; + struct pipe_context *pipe; + + struct st_context *st_ctx; + EGLBoolean force_validate; + struct egl_g3d_buffer draw, read; +}; + +struct egl_g3d_surface { + _EGLSurface base; + struct native_surface *native; + enum native_attachment render_att; + struct pipe_surface *render_surface; + unsigned int sequence_number; +}; + +struct egl_g3d_config { + _EGLConfig base; + const struct native_config *native; +}; + +struct egl_g3d_screen { + _EGLScreen base; + const struct native_connector *native; + const struct native_mode **native_modes; +}; + +/* standard typecasts */ +_EGL_DRIVER_STANDARD_TYPECASTS(egl_g3d) +_EGL_DRIVER_TYPECAST(egl_g3d_screen, _EGLScreen, obj) + +#endif /* _EGL_G3D_H_ */ diff --git a/src/gallium/state_trackers/egl/common/egl_st.c b/src/gallium/state_trackers/egl/common/egl_st.c new file mode 100644 index 00000000000..a88ff911cd5 --- /dev/null +++ b/src/gallium/state_trackers/egl/common/egl_st.c @@ -0,0 +1,131 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <dlfcn.h> +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "egllog.h" +#include "EGL/egl.h" /* for EGL_api_BIT */ + +#include "egl_st.h" + +#ifndef HAVE_DLADDR +#define HAVE_DLADDR 1 +#endif + +#if HAVE_DLADDR + +static const char * +egl_g3d_st_names[] = { +#define ST_PUBLIC(name, ...) #name, +#include "st_public_tmp.h" + NULL +}; + +static boolean +egl_g3d_fill_st(struct egl_g3d_st *stapi, void *sym) +{ + st_proc *procs = (st_proc *) stapi; + void *handle; + Dl_info info; + const char **name; + + if (!dladdr(sym, &info)) + return FALSE; + handle = dlopen(info.dli_fname, RTLD_LAZY | RTLD_LOCAL | RTLD_NODELETE); + if (!handle) + return FALSE; + + for (name = egl_g3d_st_names; *name; name++) { + st_proc proc = (st_proc) dlsym(handle, *name); + if (!proc) { + _eglLog(_EGL_WARNING, "%s is missing in %s", *name, info.dli_fname); + memset(stapi, 0, sizeof(*stapi)); + dlclose(handle); + return FALSE; + } + *procs++ = proc; + } + + dlclose(handle); + return TRUE; +} + +#else /* HAVE_DLADDR */ + +static boolean +egl_g3d_fill_st(struct egl_g3d_st *stapi, void *sym) +{ +#define ST_PUBLIC(name, ...) stapi->name = name; +#include "st_public_tmp.h" + return TRUE; +} + +#endif /* HAVE_DLADDR */ + +static boolean +egl_g3d_init_st(struct egl_g3d_st *stapi, const char *api) +{ + void *handle, *sym; + boolean res = FALSE; + + /* already initialized */ + if (stapi->st_notify_swapbuffers != NULL) + return TRUE; + + handle = dlopen(NULL, RTLD_LAZY | RTLD_LOCAL); + if (!handle) + return FALSE; + + sym = dlsym(handle, api); + if (sym && egl_g3d_fill_st(stapi, sym)) + res = TRUE; + + dlclose(handle); + return res; +} + +static struct { + const char *symbol; + EGLint api_bit; +} egl_g3d_st_info[NUM_EGL_G3D_STS] = { + { "st_api_OpenGL_ES1", EGL_OPENGL_ES_BIT }, + { "st_api_OpenVG", EGL_OPENVG_BIT }, + { "st_api_OpenGL_ES2", EGL_OPENGL_ES2_BIT }, + { "st_api_OpenGL", EGL_OPENGL_BIT }, +}; + +const struct egl_g3d_st * +egl_g3d_get_st(enum egl_g3d_st_api api) +{ + static struct egl_g3d_st all_trackers[NUM_EGL_G3D_STS]; + + if (egl_g3d_init_st(&all_trackers[api], egl_g3d_st_info[api].symbol)) { + all_trackers[api].api_bit = egl_g3d_st_info[api].api_bit; + return &all_trackers[api]; + } + else { + return NULL; + } +} diff --git a/src/gallium/state_trackers/egl/common/egl_st.h b/src/gallium/state_trackers/egl/common/egl_st.h new file mode 100644 index 00000000000..8fb464bd3d7 --- /dev/null +++ b/src/gallium/state_trackers/egl/common/egl_st.h @@ -0,0 +1,73 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EGL_ST_H_ +#define _EGL_ST_H_ + +#include "GL/gl.h" /* for GL types */ +#include "GL/internal/glcore.h" /* for __GLcontextModes */ + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" + +/* avoid calling st functions directly */ +#if 1 + +#define ST_SURFACE_FRONT_LEFT 0 +#define ST_SURFACE_BACK_LEFT 1 +#define ST_SURFACE_FRONT_RIGHT 2 +#define ST_SURFACE_BACK_RIGHT 3 + +#define ST_TEXTURE_2D 0x2 + +struct st_context; +struct st_framebuffer; +typedef void (*st_proc)(); + +#else +#include "state_tracker/st_public.h" +#endif + +/* remember to update egl_g3d_get_st() when update the enums */ +enum egl_g3d_st_api { + EGL_G3D_ST_OPENGL_ES = 0, + EGL_G3D_ST_OPENVG, + EGL_G3D_ST_OPENGL_ES2, + EGL_G3D_ST_OPENGL, + + NUM_EGL_G3D_STS +}; + +struct egl_g3d_st { +#define ST_PUBLIC(name, ret, ...) ret (*name)(__VA_ARGS__); +#include "st_public_tmp.h" + /* fields must be added here */ + EGLint api_bit; +}; + +const struct egl_g3d_st * +egl_g3d_get_st(enum egl_g3d_st_api api); + +#endif /* _EGL_ST_H_ */ diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h new file mode 100644 index 00000000000..4f9758545ab --- /dev/null +++ b/src/gallium/state_trackers/egl/common/native.h @@ -0,0 +1,272 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NATIVE_H_ +#define _NATIVE_H_ + +#include "EGL/egl.h" /* for EGL native types */ +#include "GL/gl.h" /* for GL types needed by __GLcontextModes */ +#include "GL/internal/glcore.h" /* for __GLcontextModes */ + +#include "pipe/p_compiler.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +/** + * Only color buffers are listed. The others are allocated privately through, + * for example, st_renderbuffer_alloc_storage(). + */ +enum native_attachment { + NATIVE_ATTACHMENT_FRONT_LEFT, + NATIVE_ATTACHMENT_BACK_LEFT, + NATIVE_ATTACHMENT_FRONT_RIGHT, + NATIVE_ATTACHMENT_BACK_RIGHT, + + NUM_NATIVE_ATTACHMENTS +}; + +/** + * Enumerations for probe results. + */ +enum native_probe_result { + NATIVE_PROBE_UNKNOWN, + NATIVE_PROBE_FALLBACK, + NATIVE_PROBE_SUPPORTED, + NATIVE_PROBE_EXACT, +}; + +/** + * A probe object for display probe. + */ +struct native_probe { + int magic; + EGLNativeDisplayType display; + void *data; + + void (*destroy)(struct native_probe *nprobe); +}; + +struct native_surface { + void (*destroy)(struct native_surface *nsurf); + + /** + * Swap the front and back buffers so that the back buffer is visible. It + * is no-op if the surface is single-buffered. The contents of the back + * buffer after swapping may or may not be preserved. + */ + boolean (*swap_buffers)(struct native_surface *nsurf); + + /** + * Make the front buffer visible. In some native displays, changes to the + * front buffer might not be visible immediately and require manual flush. + */ + boolean (*flush_frontbuffer)(struct native_surface *nsurf); + + /** + * Validate the buffers of the surface. textures, if not NULL, points to an + * array of size NUM_NATIVE_ATTACHMENTS and the returned textures are owned + * by the caller. A sequence number is also returned. The caller can use + * it to check if anything has changed since the last call. Any of the + * pointers may be NULL and it indicates the caller has no interest in those + * values. + * + * If this function is called multiple times with different attachment + * masks, those not listed in the latest call might be destroyed. This + * behavior might change in the future. + */ + boolean (*validate)(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height); + + /** + * Wait until all native commands affecting the surface has been executed. + */ + void (*wait)(struct native_surface *nsurf); +}; + +struct native_config { + /* __GLcontextModes should go away some day */ + __GLcontextModes mode; + enum pipe_format color_format; + enum pipe_format depth_format; + enum pipe_format stencil_format; + + /* treat it as an additional flag to mode.drawableType */ + boolean scanout_bit; +}; + +struct native_connector { + int dummy; +}; + +struct native_mode { + const char *desc; + int width, height; + int refresh_rate; +}; + +struct native_display_modeset; + +/** + * A pipe winsys abstracts the OS. A pipe screen abstracts the graphcis + * hardware. A native display consists of a pipe winsys, a pipe screen, and + * the native display server. + */ +struct native_display { + /** + * The pipe screen of the native display. + * + * Note that the "flush_frontbuffer" and "update_buffer" callbacks will be + * overridden. + */ + struct pipe_screen *screen; + + void (*destroy)(struct native_display *ndpy); + + /** + * Get the supported configs. The configs are owned by the display, but + * the returned array should be free()ed. + * + * The configs will be converted to EGL config by + * _eglConfigFromContextModesRec and validated by _eglValidateConfig. + * Those failing to pass the test will be skipped. + */ + const struct native_config **(*get_configs)(struct native_display *ndpy, + int *num_configs); + + /** + * Test if a pixmap is supported by the given config. Required unless no + * config has GLX_PIXMAP_BIT set. + * + * This function is usually called to find a config that supports a given + * pixmap. Thus, it is usually called with the same pixmap in a row. + */ + boolean (*is_pixmap_supported)(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf); + + + /** + * Create a window surface. Required unless no config has GLX_WINDOW_BIT + * set. + */ + struct native_surface *(*create_window_surface)(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf); + + /** + * Create a pixmap surface. Required unless no config has GLX_PIXMAP_BIT + * set. + */ + struct native_surface *(*create_pixmap_surface)(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf); + + /** + * Create a pbuffer surface. Required unless no config has GLX_PBUFFER_BIT + * set. + */ + struct native_surface *(*create_pbuffer_surface)(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height); + + const struct native_display_modeset *modeset; +}; + +/** + * Mode setting interface of the native display. It exposes the mode setting + * capabilities of the underlying graphics hardware. + */ +struct native_display_modeset { + /** + * Get the available physical connectors and the number of CRTCs. + */ + const struct native_connector **(*get_connectors)(struct native_display *ndpy, + int *num_connectors, + int *num_crtcs); + + /** + * Get the current supported modes of a connector. The returned modes may + * change every time this function is called and those from previous calls + * might become invalid. + */ + const struct native_mode **(*get_modes)(struct native_display *ndpy, + const struct native_connector *nconn, + int *num_modes); + + /** + * Create a scan-out surface. Required unless no config has + * GLX_SCREEN_BIT_MESA set. + */ + struct native_surface *(*create_scanout_surface)(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height); + + /** + * Program the CRTC to output the surface to the given connectors with the + * given mode. When surface is not given, the CRTC is disabled. + * + * This interface does not export a way to query capabilities of the CRTCs. + * The native display usually needs to dynamically map the index to a CRTC + * that supports the given connectors. + */ + boolean (*program)(struct native_display *ndpy, int crtc_idx, + struct native_surface *nsurf, uint x, uint y, + const struct native_connector **nconns, int num_nconns, + const struct native_mode *nmode); +}; + +/** + * Test whether an attachment is set in the mask. + */ +static INLINE boolean +native_attachment_mask_test(uint mask, enum native_attachment att) +{ + return !!(mask & (1 << att)); +} + +/** + * Return a probe object for the given display. + * + * Note that the returned object may be cached and used by different native + * display modules. It allows fast probing when multiple modules probe the + * same display. + */ +struct native_probe * +native_create_probe(EGLNativeDisplayType dpy); + +/** + * Probe the probe object. + */ +enum native_probe_result +native_get_probe_result(struct native_probe *nprobe); + +const char * +native_get_name(void); + +struct native_display * +native_create_display(EGLNativeDisplayType dpy); + +#endif /* _NATIVE_H_ */ diff --git a/src/gallium/state_trackers/egl/common/st_public_tmp.h b/src/gallium/state_trackers/egl/common/st_public_tmp.h new file mode 100644 index 00000000000..507a0ec4027 --- /dev/null +++ b/src/gallium/state_trackers/egl/common/st_public_tmp.h @@ -0,0 +1,20 @@ +ST_PUBLIC(st_create_context, struct st_context *, struct pipe_context *pipe, const __GLcontextModes *visual, struct st_context *share) +ST_PUBLIC(st_destroy_context, void, struct st_context *st) +ST_PUBLIC(st_copy_context_state, void, struct st_context *dst, struct st_context *src, uint mask) +ST_PUBLIC(st_create_framebuffer, struct st_framebuffer *, const __GLcontextModes *visual, enum pipe_format colorFormat, enum pipe_format depthFormat, enum pipe_format stencilFormat, uint width, uint height, void *privateData) +ST_PUBLIC(st_resize_framebuffer, void, struct st_framebuffer *stfb, uint width, uint height) +ST_PUBLIC(st_set_framebuffer_surface, void, struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface *surf) +ST_PUBLIC(st_get_framebuffer_dimensions, void, struct st_framebuffer *stfb, uint *width, uint *height) +ST_PUBLIC(st_get_framebuffer_surface, int, struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface **surface) +ST_PUBLIC(st_get_framebuffer_texture, int, struct st_framebuffer *stfb, uint surfIndex, struct pipe_texture **texture) +ST_PUBLIC(st_framebuffer_private, void *, struct st_framebuffer *stfb) +ST_PUBLIC(st_unreference_framebuffer, void, struct st_framebuffer *stfb) +ST_PUBLIC(st_make_current, GLboolean, struct st_context *st, struct st_framebuffer *draw, struct st_framebuffer *read) +ST_PUBLIC(st_get_current, struct st_context *, void) +ST_PUBLIC(st_flush, void, struct st_context *st, uint pipeFlushFlags, struct pipe_fence_handle **fence) +ST_PUBLIC(st_finish, void, struct st_context *st) +ST_PUBLIC(st_notify_swapbuffers, void, struct st_framebuffer *stfb) +ST_PUBLIC(st_bind_texture_surface, int, struct pipe_surface *ps, int target, int level, enum pipe_format format) +ST_PUBLIC(st_unbind_texture_surface, int, struct pipe_surface *ps, int target, int level) +ST_PUBLIC(st_get_proc_address, st_proc, const char *procname) +#undef ST_PUBLIC diff --git a/src/gallium/state_trackers/egl/egl_context.c b/src/gallium/state_trackers/egl/egl_context.c deleted file mode 100644 index fee186c6010..00000000000 --- a/src/gallium/state_trackers/egl/egl_context.c +++ /dev/null @@ -1,105 +0,0 @@ - -#include "utils.h" -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include "egl_tracker.h" - -#include "egllog.h" - - -#include "pipe/p_context.h" -#include "pipe/p_screen.h" - -#include "state_tracker/st_public.h" -#include "state_tracker/drm_api.h" - -#include "GL/internal/glcore.h" - -_EGLContext * -drm_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, _EGLContext *share_list, const EGLint *attrib_list) -{ - struct drm_device *dev = lookup_drm_device(dpy); - struct drm_context *ctx; - struct drm_context *share = NULL; - struct st_context *st_share = NULL; - int i; - __GLcontextModes *visual; - - for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { - switch (attrib_list[i]) { - /* no attribs defined for now */ - default: - _eglError(EGL_BAD_ATTRIBUTE, "eglCreateContext"); - return EGL_NO_CONTEXT; - } - } - - ctx = (struct drm_context *) calloc(1, sizeof(struct drm_context)); - if (!ctx) - goto err_c; - - _eglInitContext(drv, &ctx->base, conf, attrib_list); - - ctx->pipe = dev->api->create_context(dev->api, dev->screen); - if (!ctx->pipe) - goto err_pipe; - - if (share) - st_share = share->st; - - visual = drm_visual_from_config(conf); - ctx->st = st_create_context(ctx->pipe, visual, st_share); - drm_visual_modes_destroy(visual); - - if (!ctx->st) - goto err_gl; - - return &ctx->base; - -err_gl: - ctx->pipe->destroy(ctx->pipe); -err_pipe: - free(ctx); -err_c: - return NULL; -} - -EGLBoolean -drm_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *context) -{ - struct drm_context *c = lookup_drm_context(context); - if (!_eglIsContextBound(&c->base)) { - st_destroy_context(c->st); - free(c); - } - return EGL_TRUE; -} - -EGLBoolean -drm_make_current(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw, _EGLSurface *read, _EGLContext *context) -{ - struct drm_surface *readSurf = lookup_drm_surface(read); - struct drm_surface *drawSurf = lookup_drm_surface(draw); - struct drm_context *ctx = lookup_drm_context(context); - EGLBoolean b; - - b = _eglMakeCurrent(drv, dpy, draw, read, context); - if (!b) - return EGL_FALSE; - - if (ctx) { - if (!drawSurf || !readSurf) - return EGL_FALSE; - - st_make_current(ctx->st, drawSurf->stfb, readSurf->stfb); - - /* st_resize_framebuffer needs a bound context to work */ - st_resize_framebuffer(drawSurf->stfb, drawSurf->w, drawSurf->h); - st_resize_framebuffer(readSurf->stfb, readSurf->w, readSurf->h); - } else { - st_make_current(NULL, NULL, NULL); - } - - return EGL_TRUE; -} diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c deleted file mode 100644 index d55aa51b82d..00000000000 --- a/src/gallium/state_trackers/egl/egl_surface.c +++ /dev/null @@ -1,443 +0,0 @@ - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include "egl_tracker.h" - -#include "egllog.h" - -#include "pipe/p_inlines.h" -#include "pipe/p_screen.h" -#include "pipe/p_context.h" - -#include "state_tracker/drm_api.h" - -#include "util/u_format.h" -#include "util/u_rect.h" - -/* - * Util functions - */ - -static drmModeModeInfoPtr -drm_find_mode(drmModeConnectorPtr connector, _EGLMode *mode) -{ - int i; - drmModeModeInfoPtr m = NULL; - - for (i = 0; i < connector->count_modes; i++) { - m = &connector->modes[i]; - if (m->hdisplay == mode->Width && m->vdisplay == mode->Height && m->vrefresh == mode->RefreshRate) - break; - m = &connector->modes[0]; /* if we can't find one, return first */ - } - - return m; -} - -static struct st_framebuffer * -drm_create_framebuffer(struct pipe_screen *screen, - const __GLcontextModes *visual, - unsigned width, - unsigned height, - void *priv) -{ - enum pipe_format color_format, depth_stencil_format; - boolean d_depth_bits_last; - boolean ds_depth_bits_last; - - d_depth_bits_last = - screen->is_format_supported(screen, PIPE_FORMAT_X8Z24_UNORM, - PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); - ds_depth_bits_last = - screen->is_format_supported(screen, PIPE_FORMAT_S8Z24_UNORM, - PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); - - if (visual->redBits == 8) { - if (visual->alphaBits == 8) - color_format = PIPE_FORMAT_A8R8G8B8_UNORM; - else - color_format = PIPE_FORMAT_X8R8G8B8_UNORM; - } else { - color_format = PIPE_FORMAT_R5G6B5_UNORM; - } - - switch(visual->depthBits) { - default: - case 0: - depth_stencil_format = PIPE_FORMAT_NONE; - break; - case 16: - depth_stencil_format = PIPE_FORMAT_Z16_UNORM; - break; - case 24: - if (visual->stencilBits == 0) { - depth_stencil_format = (d_depth_bits_last) ? - PIPE_FORMAT_X8Z24_UNORM: - PIPE_FORMAT_Z24X8_UNORM; - } else { - depth_stencil_format = (ds_depth_bits_last) ? - PIPE_FORMAT_S8Z24_UNORM: - PIPE_FORMAT_Z24S8_UNORM; - } - break; - case 32: - depth_stencil_format = PIPE_FORMAT_Z32_UNORM; - break; - } - - return st_create_framebuffer(visual, - color_format, - depth_stencil_format, - depth_stencil_format, - width, - height, - priv); -} - -static void -drm_create_texture(_EGLDisplay *dpy, - struct drm_screen *scrn, - unsigned w, unsigned h) -{ - struct drm_device *dev = lookup_drm_device(dpy); - struct pipe_screen *screen = dev->screen; - struct pipe_surface *surface; - struct pipe_texture *texture; - struct pipe_texture templat; - struct pipe_buffer *buf = NULL; - unsigned pitch = 0; - - memset(&templat, 0, sizeof(templat)); - templat.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; - templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; - templat.target = PIPE_TEXTURE_2D; - templat.last_level = 0; - templat.depth0 = 1; - templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.width0 = w; - templat.height0 = h; - - texture = screen->texture_create(dev->screen, - &templat); - - if (!texture) - goto err_tex; - - surface = screen->get_tex_surface(screen, - texture, - 0, - 0, - 0, - PIPE_BUFFER_USAGE_GPU_WRITE); - - if (!surface) - goto err_surf; - - scrn->tex = texture; - scrn->surface = surface; - scrn->front.width = w; - scrn->front.height = h; - scrn->front.pitch = pitch; - dev->api->local_handle_from_texture(dev->api, screen, texture, - &scrn->front.pitch, &scrn->front.handle); - if (0) - goto err_handle; - - return; - -err_handle: - pipe_surface_reference(&surface, NULL); -err_surf: - pipe_texture_reference(&texture, NULL); -err_tex: - pipe_buffer_reference(&buf, NULL); - return; -} - -/* - * Exported functions - */ - -void -drm_takedown_shown_screen(_EGLDisplay *dpy, struct drm_screen *screen) -{ - struct drm_device *dev = lookup_drm_device(dpy); - - screen->surf = NULL; - - drmModeSetCrtc( - dev->drmFD, - screen->crtcID, - 0, /* FD */ - 0, 0, - NULL, 0, /* List of output ids */ - NULL); - - drmModeRmFB(dev->drmFD, screen->fbID); - drmModeFreeFB(screen->fb); - screen->fb = NULL; - - pipe_surface_reference(&screen->surface, NULL); - pipe_texture_reference(&screen->tex, NULL); - - screen->shown = 0; -} - -/** - * Called by libEGL's eglCreateWindowSurface(). - */ -_EGLSurface * -drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, NativeWindowType window, const EGLint *attrib_list) -{ - return NULL; -} - - -/** - * Called by libEGL's eglCreatePixmapSurface(). - */ -_EGLSurface * -drm_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, NativePixmapType pixmap, const EGLint *attrib_list) -{ - return NULL; -} - - -/** - * Called by libEGL's eglCreatePbufferSurface(). - */ -_EGLSurface * -drm_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, - const EGLint *attrib_list) -{ - struct drm_device *dev = lookup_drm_device(dpy); - int i; - int width = -1; - int height = -1; - struct drm_surface *surf = NULL; - __GLcontextModes *visual; - - for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { - switch (attrib_list[i]) { - case EGL_WIDTH: - width = attrib_list[++i]; - break; - case EGL_HEIGHT: - height = attrib_list[++i]; - break; - default: - _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); - return EGL_NO_SURFACE; - } - } - - if (width < 1 || height < 1) { - _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); - return NULL; - } - - surf = (struct drm_surface *) calloc(1, sizeof(struct drm_surface)); - if (!surf) - goto err; - - if (!_eglInitSurface(drv, &surf->base, EGL_PBUFFER_BIT, conf, attrib_list)) - goto err_surf; - - surf->w = width; - surf->h = height; - - visual = drm_visual_from_config(conf); - surf->stfb = drm_create_framebuffer(dev->screen, visual, - width, height, - (void*)surf); - drm_visual_modes_destroy(visual); - - return &surf->base; - -err_surf: - free(surf); -err: - return NULL; -} - -/** - * Called by libEGL's eglCreateScreenSurfaceMESA(). - */ -_EGLSurface * -drm_create_screen_surface_mesa(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *cfg, - const EGLint *attrib_list) -{ - EGLSurface surf = drm_create_pbuffer_surface(drv, dpy, cfg, attrib_list); - - return surf; -} - -/** - * Called by libEGL's eglShowScreenSurfaceMESA(). - */ -EGLBoolean -drm_show_screen_surface_mesa(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLScreen *screen, - _EGLSurface *surface, _EGLMode *mode) -{ - struct drm_device *dev = lookup_drm_device(dpy); - struct drm_surface *surf = lookup_drm_surface(surface); - struct drm_screen *scrn = lookup_drm_screen(screen); - int ret; - unsigned int i, k; - - if (scrn->shown) - drm_takedown_shown_screen(dpy, scrn); - - - drm_create_texture(dpy, scrn, mode->Width, mode->Height); - if (!scrn->tex) - goto err_tex; - - ret = drmModeAddFB(dev->drmFD, - scrn->front.width, scrn->front.height, - 32, 32, scrn->front.pitch, - scrn->front.handle, - &scrn->fbID); - - if (ret) - goto err_bo; - - scrn->fb = drmModeGetFB(dev->drmFD, scrn->fbID); - if (!scrn->fb) - goto err_bo; - - /* find a fitting crtc */ - { - drmModeConnector *con = scrn->connector; - - scrn->mode = drm_find_mode(con, mode); - if (!scrn->mode) - goto err_fb; - - for (k = 0; k < con->count_encoders; k++) { - drmModeEncoder *enc = drmModeGetEncoder(dev->drmFD, con->encoders[k]); - for (i = 0; i < dev->res->count_crtcs; i++) { - if (enc->possible_crtcs & (1<<i)) { - /* save the ID */ - scrn->crtcID = dev->res->crtcs[i]; - - /* skip the rest */ - i = dev->res->count_crtcs; - k = dev->res->count_encoders; - } - } - drmModeFreeEncoder(enc); - } - } - - ret = drmModeSetCrtc(dev->drmFD, - scrn->crtcID, - scrn->fbID, - 0, 0, - &scrn->connectorID, 1, - scrn->mode); - - if (ret) - goto err_crtc; - - - if (scrn->dpms) - drmModeConnectorSetProperty(dev->drmFD, - scrn->connectorID, - scrn->dpms->prop_id, - DRM_MODE_DPMS_ON); - - surf->screen = scrn; - - scrn->surf = surf; - scrn->shown = 1; - - return EGL_TRUE; - -err_crtc: - scrn->crtcID = 0; - -err_fb: - drmModeRmFB(dev->drmFD, scrn->fbID); - drmModeFreeFB(scrn->fb); - scrn->fb = NULL; - -err_bo: - pipe_surface_reference(&scrn->surface, NULL); - pipe_texture_reference(&scrn->tex, NULL); - -err_tex: - return EGL_FALSE; -} - -/** - * Called by libEGL's eglDestroySurface(). - */ -EGLBoolean -drm_destroy_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface) -{ - struct drm_surface *surf = lookup_drm_surface(surface); - if (!_eglIsSurfaceBound(&surf->base)) { - if (surf->screen) - drm_takedown_shown_screen(dpy, surf->screen); - st_unreference_framebuffer(surf->stfb); - free(surf); - } - return EGL_TRUE; -} - -/** - * Called by libEGL's eglSwapBuffers(). - */ -EGLBoolean -drm_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw) -{ - struct drm_device *dev = lookup_drm_device(dpy); - struct drm_surface *surf = lookup_drm_surface(draw); - struct pipe_surface *back_surf; - - if (!surf) - return EGL_FALSE; - - st_get_framebuffer_surface(surf->stfb, ST_SURFACE_BACK_LEFT, &back_surf); - - if (back_surf) { - struct drm_context *ctx = lookup_drm_context(draw->Binding); - - st_notify_swapbuffers(surf->stfb); - - if (ctx && surf->screen) { - if (ctx->pipe->surface_copy) { - ctx->pipe->surface_copy(ctx->pipe, - surf->screen->surface, - 0, 0, - back_surf, - 0, 0, - surf->w, surf->h); - } else { - util_surface_copy(ctx->pipe, FALSE, - surf->screen->surface, - 0, 0, - back_surf, - 0, 0, - surf->w, surf->h); - } - ctx->pipe->flush(ctx->pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE, NULL); - -#ifdef DRM_MODE_FEATURE_DIRTYFB - /* TODO query connector property to see if this is needed */ - drmModeDirtyFB(dev->drmFD, surf->screen->fbID, NULL, 0); -#else - (void)dev; -#endif - - /* TODO more stuff here */ - } - } - - return EGL_TRUE; -} diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c deleted file mode 100644 index 9345b0f4908..00000000000 --- a/src/gallium/state_trackers/egl/egl_tracker.c +++ /dev/null @@ -1,274 +0,0 @@ - -#include "utils.h" - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include "egl_tracker.h" - -#include <fcntl.h> - -#include "egllog.h" -#include "state_tracker/drm_api.h" - -#include "pipe/p_screen.h" -#include "pipe/internal/p_winsys_screen.h" - -/** HACK */ -void* driDriverAPI; - - -/* - * Exported functions - */ - -/** Called by libEGL just prior to unloading/closing the driver. - */ -static void -drm_unload(_EGLDriver *drv) -{ - free(drv); -} - -/** - * The bootstrap function. Return a new drm_driver object and - * plug in API functions. - * libEGL finds this function with dlopen()/dlsym() and calls it from - * "load driver" function. - */ -_EGLDriver * -_eglMain(const char *args) -{ - _EGLDriver *drv; - - drv = (_EGLDriver *) calloc(1, sizeof(_EGLDriver)); - if (!drv) { - return NULL; - } - - /* First fill in the dispatch table with defaults */ - _eglInitDriverFallbacks(drv); - /* then plug in our Drm-specific functions */ - drv->API.Initialize = drm_initialize; - drv->API.Terminate = drm_terminate; - drv->API.CreateContext = drm_create_context; - drv->API.MakeCurrent = drm_make_current; - drv->API.CreateWindowSurface = drm_create_window_surface; - drv->API.CreatePixmapSurface = drm_create_pixmap_surface; - drv->API.CreatePbufferSurface = drm_create_pbuffer_surface; - drv->API.DestroySurface = drm_destroy_surface; - drv->API.DestroyContext = drm_destroy_context; - drv->API.CreateScreenSurfaceMESA = drm_create_screen_surface_mesa; - drv->API.ShowScreenSurfaceMESA = drm_show_screen_surface_mesa; - drv->API.SwapBuffers = drm_swap_buffers; - - drv->Name = "DRM/Gallium/Win"; - drv->Unload = drm_unload; - - return drv; -} - -static void -drm_get_device_id(struct drm_device *device) -{ - char path[512]; - FILE *file; - char *ret; - - /* TODO get the real minor */ - int minor = 0; - - device->deviceID = 0; - - snprintf(path, sizeof(path), "/sys/class/drm/card%d/device/device", minor); - file = fopen(path, "r"); - if (!file) { - _eglLog(_EGL_WARNING, "Could not retrive device ID\n"); - return; - } - - ret = fgets(path, sizeof( path ), file); - fclose(file); - if (!ret) - return; - - sscanf(path, "%x", &device->deviceID); -} - -static void -drm_update_res(struct drm_device *dev) -{ - drmModeFreeResources(dev->res); - dev->res = drmModeGetResources(dev->drmFD); -} - -static void -drm_add_modes_from_connector(_EGLScreen *screen, drmModeConnectorPtr connector) -{ - drmModeModeInfoPtr m = NULL; - int i; - - for (i = 0; i < connector->count_modes; i++) { - m = &connector->modes[i]; - _eglAddNewMode(screen, m->hdisplay, m->vdisplay, m->vrefresh, m->name); - } -} - -static void -drm_find_dpms(struct drm_device *dev, struct drm_screen *screen) -{ - drmModeConnectorPtr c = screen->connector; - drmModePropertyPtr p; - int i; - - for (i = 0; i < c->count_props; i++) { - p = drmModeGetProperty(dev->drmFD, c->props[i]); - if (!strcmp(p->name, "DPMS")) - break; - - drmModeFreeProperty(p); - p = NULL; - } - - screen->dpms = p; -} - -static int drm_open_minor(int minor) -{ - char buf[64]; - - sprintf(buf, DRM_DEV_NAME, DRM_DIR_NAME, minor); - return open(buf, O_RDWR, 0); -} - -EGLBoolean -drm_initialize(_EGLDriver *drv, _EGLDisplay *disp, EGLint *major, EGLint *minor) -{ - struct drm_device *dev; - struct drm_screen *screen = NULL; - drmModeConnectorPtr connector = NULL; - drmModeResPtr res = NULL; - unsigned count_connectors = 0; - int num_screens = 0; - EGLint i; - int fd; - _EGLConfig *config; - - dev = (struct drm_device *) calloc(1, sizeof(struct drm_device)); - if (!dev) - return EGL_FALSE; - dev->api = drm_api_create(); - - /* try the first node */ - fd = drm_open_minor(0); - if (fd < 0) - goto err_fd; - - dev->drmFD = fd; - drm_get_device_id(dev); - - dev->screen = dev->api->create_screen(dev->api, dev->drmFD, NULL); - if (!dev->screen) - goto err_screen; - dev->winsys = dev->screen->winsys; - - driInitExtensions(NULL, NULL, GL_FALSE); - - drm_update_res(dev); - res = dev->res; - if (res) - count_connectors = res->count_connectors; - else - _eglLog(_EGL_WARNING, "Could not retrive kms information\n"); - - for(i = 0; i < count_connectors && i < MAX_SCREENS; i++) { - connector = drmModeGetConnector(fd, res->connectors[i]); - - if (!connector) - continue; - - if (connector->connection != DRM_MODE_CONNECTED) { - drmModeFreeConnector(connector); - continue; - } - - screen = malloc(sizeof(struct drm_screen)); - memset(screen, 0, sizeof(*screen)); - screen->connector = connector; - screen->connectorID = connector->connector_id; - _eglInitScreen(&screen->base); - _eglAddScreen(disp, &screen->base); - drm_add_modes_from_connector(&screen->base, connector); - drm_find_dpms(dev, screen); - dev->screens[num_screens++] = screen; - } - dev->count_screens = num_screens; - - disp->DriverData = dev; - - /* for now we only have one config */ - config = calloc(1, sizeof(*config)); - memset(config, 1, sizeof(*config)); - _eglInitConfig(config, 1); - _eglSetConfigAttrib(config, EGL_RED_SIZE, 8); - _eglSetConfigAttrib(config, EGL_GREEN_SIZE, 8); - _eglSetConfigAttrib(config, EGL_BLUE_SIZE, 8); - _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, 8); - _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, 32); - _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, 24); - _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, 8); - _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT); - _eglAddConfig(disp, config); - - disp->ClientAPIsMask = EGL_OPENGL_BIT /*| EGL_OPENGL_ES_BIT*/; - /* enable supported extensions */ - disp->Extensions.MESA_screen_surface = EGL_TRUE; - disp->Extensions.MESA_copy_context = EGL_TRUE; - - *major = 1; - *minor = 4; - - return EGL_TRUE; - -err_screen: - drmClose(fd); -err_fd: - free(dev); - return EGL_FALSE; -} - -EGLBoolean -drm_terminate(_EGLDriver *drv, _EGLDisplay *dpy) -{ - struct drm_device *dev = lookup_drm_device(dpy); - struct drm_screen *screen; - int i = 0; - - _eglReleaseDisplayResources(drv, dpy); - _eglCleanupDisplay(dpy); - - drmFreeVersion(dev->version); - - for (i = 0; i < dev->count_screens; i++) { - screen = dev->screens[i]; - - if (screen->shown) - drm_takedown_shown_screen(dpy, screen); - - drmModeFreeProperty(screen->dpms); - drmModeFreeConnector(screen->connector); - _eglDestroyScreen(&screen->base); - dev->screens[i] = NULL; - } - - dev->screen->destroy(dev->screen); - dev->winsys = NULL; - - drmClose(dev->drmFD); - - dev->api->destroy(dev->api); - free(dev); - dpy->DriverData = NULL; - - return EGL_TRUE; -} diff --git a/src/gallium/state_trackers/egl/egl_tracker.h b/src/gallium/state_trackers/egl/egl_tracker.h deleted file mode 100644 index 73eb1a1226e..00000000000 --- a/src/gallium/state_trackers/egl/egl_tracker.h +++ /dev/null @@ -1,195 +0,0 @@ - -#ifndef _EGL_TRACKER_H_ -#define _EGL_TRACKER_H_ - -#include <stdint.h> - -#include "eglconfig.h" -#include "eglcontext.h" -#include "egldisplay.h" -#include "egldriver.h" -#include "eglglobals.h" -#include "eglmode.h" -#include "eglscreen.h" -#include "eglsurface.h" - -#include "xf86drm.h" -#include "xf86drmMode.h" - -#include "pipe/p_compiler.h" - -#include "state_tracker/st_public.h" - -#define MAX_SCREENS 16 - -struct pipe_winsys; -struct pipe_screen; -struct pipe_context; -struct state_tracker; - -struct drm_screen; -struct drm_context; - -struct drm_device -{ - /* - * pipe - */ - - struct drm_api *api; - struct pipe_winsys *winsys; - struct pipe_screen *screen; - - /* - * drm - */ - - int drmFD; - drmVersionPtr version; - int deviceID; - - drmModeResPtr res; - - struct drm_screen *screens[MAX_SCREENS]; - size_t count_screens; -}; - -struct drm_surface -{ - _EGLSurface base; /* base class/object */ - - /* - * pipe - */ - - - struct st_framebuffer *stfb; - - /* - * drm - */ - - struct drm_screen *screen; - - int w; - int h; -}; - -struct drm_context -{ - _EGLContext base; /* base class/object */ - - /* pipe */ - - struct pipe_context *pipe; - struct st_context *st; -}; - -struct drm_screen -{ - _EGLScreen base; - - /* - * pipe - */ - - struct pipe_texture *tex; - struct pipe_surface *surface; - - /* - * drm - */ - - struct { - unsigned height; - unsigned width; - unsigned pitch; - unsigned handle; - } front; - - /* currently only support one connector */ - drmModeConnectorPtr connector; - uint32_t connectorID; - - /* dpms property */ - drmModePropertyPtr dpms; - - /* Has this screen been shown */ - int shown; - - /* Surface that is currently attached to this screen */ - struct drm_surface *surf; - - /* framebuffer */ - drmModeFBPtr fb; - uint32_t fbID; - - /* crtc and mode used */ - /*drmModeCrtcPtr crtc;*/ - uint32_t crtcID; - - drmModeModeInfoPtr mode; -}; - - -static INLINE struct drm_device * -lookup_drm_device(_EGLDisplay *d) -{ - return (struct drm_device *) d->DriverData; -} - - -static INLINE struct drm_context * -lookup_drm_context(_EGLContext *c) -{ - return (struct drm_context *) c; -} - - -static INLINE struct drm_surface * -lookup_drm_surface(_EGLSurface *s) -{ - return (struct drm_surface *) s; -} - -static INLINE struct drm_screen * -lookup_drm_screen(_EGLScreen *s) -{ - return (struct drm_screen *) s; -} - -/** - * egl_visual.h - */ -/*@{*/ -void drm_visual_modes_destroy(__GLcontextModes *modes); -__GLcontextModes* drm_visual_modes_create(unsigned count, size_t minimum_size); -__GLcontextModes* drm_visual_from_config(_EGLConfig *conf); -/*@}*/ - -/** - * egl_surface.h - */ -/*@{*/ -void drm_takedown_shown_screen(_EGLDisplay *dpy, struct drm_screen *screen); -/*@}*/ - -/** - * All function exported to the egl side. - */ -/*@{*/ -EGLBoolean drm_initialize(_EGLDriver *drv, _EGLDisplay *dpy, EGLint *major, EGLint *minor); -EGLBoolean drm_terminate(_EGLDriver *drv, _EGLDisplay *dpy); -_EGLContext *drm_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, _EGLContext *share_list, const EGLint *attrib_list); -EGLBoolean drm_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *context); -_EGLSurface *drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, NativeWindowType window, const EGLint *attrib_list); -_EGLSurface *drm_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, NativePixmapType pixmap, const EGLint *attrib_list); -_EGLSurface *drm_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, const EGLint *attrib_list); -_EGLSurface *drm_create_screen_surface_mesa(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, const EGLint *attrib_list); -EGLBoolean drm_show_screen_surface_mesa(_EGLDriver *drv, _EGLDisplay *dpy, _EGLScreen *screen, _EGLSurface *surface, _EGLMode *mode); -EGLBoolean drm_destroy_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface); -EGLBoolean drm_make_current(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw, _EGLSurface *read, _EGLContext *context); -EGLBoolean drm_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw); -/*@}*/ - -#endif diff --git a/src/gallium/state_trackers/egl/egl_visual.c b/src/gallium/state_trackers/egl/egl_visual.c deleted file mode 100644 index e59f893851e..00000000000 --- a/src/gallium/state_trackers/egl/egl_visual.c +++ /dev/null @@ -1,85 +0,0 @@ - -#include "egl_tracker.h" - -#include "egllog.h" - -void -drm_visual_modes_destroy(__GLcontextModes *modes) -{ - _eglLog(_EGL_DEBUG, "%s", __FUNCTION__); - - while (modes) { - __GLcontextModes * const next = modes->next; - free(modes); - modes = next; - } -} - -__GLcontextModes * -drm_visual_modes_create(unsigned count, size_t minimum_size) -{ - /* This code copied from libGLX, and modified */ - const size_t size = (minimum_size > sizeof(__GLcontextModes)) - ? minimum_size : sizeof(__GLcontextModes); - __GLcontextModes * head = NULL; - __GLcontextModes ** next; - unsigned i; - - _eglLog(_EGL_DEBUG, "%s %d %d", __FUNCTION__, count, minimum_size); - - next = & head; - for (i = 0 ; i < count ; i++) { - *next = (__GLcontextModes *) calloc(1, size); - if (*next == NULL) { - drm_visual_modes_destroy(head); - head = NULL; - break; - } - - (*next)->doubleBufferMode = 1; - (*next)->visualID = GLX_DONT_CARE; - (*next)->visualType = GLX_DONT_CARE; - (*next)->visualRating = GLX_NONE; - (*next)->transparentPixel = GLX_NONE; - (*next)->transparentRed = GLX_DONT_CARE; - (*next)->transparentGreen = GLX_DONT_CARE; - (*next)->transparentBlue = GLX_DONT_CARE; - (*next)->transparentAlpha = GLX_DONT_CARE; - (*next)->transparentIndex = GLX_DONT_CARE; - (*next)->xRenderable = GLX_DONT_CARE; - (*next)->fbconfigID = GLX_DONT_CARE; - (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML; - (*next)->bindToTextureRgb = GLX_DONT_CARE; - (*next)->bindToTextureRgba = GLX_DONT_CARE; - (*next)->bindToMipmapTexture = GLX_DONT_CARE; - (*next)->bindToTextureTargets = 0; - (*next)->yInverted = GLX_DONT_CARE; - - next = & ((*next)->next); - } - - return head; -} - -__GLcontextModes * -drm_visual_from_config(_EGLConfig *conf) -{ - __GLcontextModes *visual; - (void)conf; - - visual = drm_visual_modes_create(1, sizeof(*visual)); - visual->redBits = 8; - visual->greenBits = 8; - visual->blueBits = 8; - visual->alphaBits = 8; - - visual->rgbBits = 32; - visual->doubleBufferMode = 1; - - visual->depthBits = 24; - visual->haveDepthBuffer = visual->depthBits > 0; - visual->stencilBits = 8; - visual->haveStencilBuffer = visual->stencilBits > 0; - - return visual; -} diff --git a/src/gallium/state_trackers/egl/kms/native_kms.c b/src/gallium/state_trackers/egl/kms/native_kms.c new file mode 100644 index 00000000000..91cefc538d1 --- /dev/null +++ b/src/gallium/state_trackers/egl/kms/native_kms.c @@ -0,0 +1,856 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> + +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "egllog.h" + +#include "native_kms.h" + +static boolean +kms_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_display *kdpy = ksurf->kdpy; + struct pipe_screen *screen = kdpy->base.screen; + struct pipe_texture templ, *ptex; + int att; + + if (attachment_mask) { + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.last_level = 0; + templ.width0 = ksurf->width; + templ.height0 = ksurf->height; + templ.depth0 = 1; + templ.format = ksurf->color_format; + templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; + if (ksurf->type == KMS_SURFACE_TYPE_SCANOUT) + templ.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; + } + + /* create textures */ + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) + continue; + + ptex = ksurf->textures[att]; + if (!ptex) { + ptex = screen->texture_create(screen, &templ); + ksurf->textures[att] = ptex; + } + + if (textures) { + textures[att] = NULL; + pipe_texture_reference(&textures[att], ptex); + } + } + + if (seq_num) + *seq_num = ksurf->sequence_number; + if (width) + *width = ksurf->width; + if (height) + *height = ksurf->height; + + return TRUE; +} + +/** + * Add textures as DRM framebuffers. + */ +static boolean +kms_surface_init_framebuffers(struct native_surface *nsurf, boolean need_back) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_display *kdpy = ksurf->kdpy; + int num_framebuffers = (need_back) ? 2 : 1; + int i, err; + + for (i = 0; i < num_framebuffers; i++) { + struct kms_framebuffer *fb; + enum native_attachment natt; + unsigned int handle, stride; + uint block_bits; + + if (i == 0) { + fb = &ksurf->front_fb; + natt = NATIVE_ATTACHMENT_FRONT_LEFT; + } + else { + fb = &ksurf->back_fb; + natt = NATIVE_ATTACHMENT_BACK_LEFT; + } + + if (!fb->texture) { + /* make sure the texture has been allocated */ + kms_surface_validate(&ksurf->base, 1 << natt, NULL, NULL, NULL, NULL); + if (!ksurf->textures[natt]) + return FALSE; + + pipe_texture_reference(&fb->texture, ksurf->textures[natt]); + } + + /* already initialized */ + if (fb->buffer_id) + continue; + + /* TODO detect the real value */ + fb->is_passive = TRUE; + + if (!kdpy->api->local_handle_from_texture(kdpy->api, + kdpy->base.screen, fb->texture, &stride, &handle)) + return FALSE; + + block_bits = util_format_get_blocksizebits(ksurf->color_format); + err = drmModeAddFB(kdpy->fd, ksurf->width, ksurf->height, + block_bits, block_bits, stride, handle, &fb->buffer_id); + if (err) { + fb->buffer_id = 0; + return FALSE; + } + } + + return TRUE; +} + +static boolean +kms_surface_flush_frontbuffer(struct native_surface *nsurf) +{ +#ifdef DRM_MODE_FEATURE_DIRTYFB + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_display *kdpy = ksurf->kdpy; + + /* pbuffer is private */ + if (ksurf->type == KMS_SURFACE_TYPE_PBUFFER) + return TRUE; + + if (ksurf->front_fb.is_passive) + drmModeDirtyFB(kdpy->fd, ksurf->front_fb.buffer_id, NULL, 0); +#endif + + return TRUE; +} + +static boolean +kms_surface_swap_buffers(struct native_surface *nsurf) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_crtc *kcrtc = &ksurf->current_crtc; + struct kms_display *kdpy = ksurf->kdpy; + struct kms_framebuffer tmp_fb; + struct pipe_texture *tmp_texture; + int err; + + /* pbuffer is private */ + if (ksurf->type == KMS_SURFACE_TYPE_PBUFFER) + return TRUE; + + if (!ksurf->back_fb.buffer_id) { + if (!kms_surface_init_framebuffers(&ksurf->base, TRUE)) + return FALSE; + } + + if (ksurf->is_shown && kcrtc->crtc) { + err = drmModeSetCrtc(kdpy->fd, kcrtc->crtc->crtc_id, + ksurf->back_fb.buffer_id, kcrtc->crtc->x, kcrtc->crtc->y, + kcrtc->connectors, kcrtc->num_connectors, &kcrtc->crtc->mode); + if (err) + return FALSE; + } + + /* swap the buffers */ + tmp_fb = ksurf->front_fb; + ksurf->front_fb = ksurf->back_fb; + ksurf->back_fb = tmp_fb; + + tmp_texture = ksurf->textures[NATIVE_ATTACHMENT_FRONT_LEFT]; + ksurf->textures[NATIVE_ATTACHMENT_FRONT_LEFT] = + ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT]; + ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT] = tmp_texture; + + /* the front/back textures are swapped */ + ksurf->sequence_number++; + + return TRUE; +} + +static void +kms_surface_wait(struct native_surface *nsurf) +{ + /* no-op */ +} + +static void +kms_surface_destroy(struct native_surface *nsurf) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + int i; + + if (ksurf->current_crtc.crtc) + drmModeFreeCrtc(ksurf->current_crtc.crtc); + + if (ksurf->front_fb.buffer_id) + drmModeRmFB(ksurf->kdpy->fd, ksurf->front_fb.buffer_id); + pipe_texture_reference(&ksurf->front_fb.texture, NULL); + + if (ksurf->back_fb.buffer_id) + drmModeRmFB(ksurf->kdpy->fd, ksurf->back_fb.buffer_id); + pipe_texture_reference(&ksurf->back_fb.texture, NULL); + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct pipe_texture *ptex = ksurf->textures[i]; + pipe_texture_reference(&ptex, NULL); + } + + free(ksurf); +} + +static struct kms_surface * +kms_display_create_surface(struct native_display *ndpy, + enum kms_surface_type type, + const struct native_config *nconf, + uint width, uint height) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_config *kconf = kms_config(nconf); + struct kms_surface *ksurf; + + ksurf = CALLOC_STRUCT(kms_surface); + if (!ksurf) + return NULL; + + ksurf->kdpy = kdpy; + ksurf->type = type; + ksurf->color_format = kconf->base.color_format; + ksurf->width = width; + ksurf->height = height; + + ksurf->base.destroy = kms_surface_destroy; + ksurf->base.swap_buffers = kms_surface_swap_buffers; + ksurf->base.flush_frontbuffer = kms_surface_flush_frontbuffer; + ksurf->base.validate = kms_surface_validate; + ksurf->base.wait = kms_surface_wait; + + return ksurf; +} + +/** + * Choose a CRTC that supports all given connectors. + */ +static uint32_t +kms_display_choose_crtc(struct native_display *ndpy, + uint32_t *connectors, int num_connectors) +{ + struct kms_display *kdpy = kms_display(ndpy); + int idx; + + for (idx = 0; idx < kdpy->resources->count_crtcs; idx++) { + boolean found_crtc = TRUE; + int i, j; + + for (i = 0; i < num_connectors; i++) { + drmModeConnectorPtr connector; + int encoder_idx = -1; + + connector = drmModeGetConnector(kdpy->fd, connectors[i]); + if (!connector) { + found_crtc = FALSE; + break; + } + + /* find an encoder the CRTC supports */ + for (j = 0; j < connector->count_encoders; j++) { + drmModeEncoderPtr encoder = + drmModeGetEncoder(kdpy->fd, connector->encoders[j]); + if (encoder->possible_crtcs & (1 << idx)) { + encoder_idx = j; + break; + } + drmModeFreeEncoder(encoder); + } + + drmModeFreeConnector(connector); + if (encoder_idx < 0) { + found_crtc = FALSE; + break; + } + } + + if (found_crtc) + break; + } + + if (idx >= kdpy->resources->count_crtcs) { + _eglLog(_EGL_WARNING, + "failed to find a CRTC that supports the given %d connectors", + num_connectors); + return 0; + } + + return kdpy->resources->crtcs[idx]; +} + +/** + * Remember the original CRTC status and set the CRTC + */ +static boolean +kms_display_set_crtc(struct native_display *ndpy, int crtc_idx, + uint32_t buffer_id, uint32_t x, uint32_t y, + uint32_t *connectors, int num_connectors, + drmModeModeInfoPtr mode) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_crtc *kcrtc = &kdpy->saved_crtcs[crtc_idx]; + uint32_t crtc_id; + int err; + + if (kcrtc->crtc) { + crtc_id = kcrtc->crtc->crtc_id; + } + else { + int count = 0, i; + + /* + * Choose the CRTC once. It could be more dynamic, but let's keep it + * simple for now. + */ + crtc_id = kms_display_choose_crtc(&kdpy->base, + connectors, num_connectors); + + /* save the original CRTC status */ + kcrtc->crtc = drmModeGetCrtc(kdpy->fd, crtc_id); + if (!kcrtc->crtc) + return FALSE; + + for (i = 0; i < kdpy->num_connectors; i++) { + struct kms_connector *kconn = &kdpy->connectors[i]; + drmModeConnectorPtr connector = kconn->connector; + drmModeEncoderPtr encoder; + + encoder = drmModeGetEncoder(kdpy->fd, connector->encoder_id); + if (encoder) { + if (encoder->crtc_id == crtc_id) { + kcrtc->connectors[count++] = connector->connector_id; + if (count >= Elements(kcrtc->connectors)) + break; + } + drmModeFreeEncoder(encoder); + } + } + + kcrtc->num_connectors = count; + } + + err = drmModeSetCrtc(kdpy->fd, crtc_id, buffer_id, x, y, + connectors, num_connectors, mode); + if (err) { + drmModeFreeCrtc(kcrtc->crtc); + kcrtc->crtc = NULL; + kcrtc->num_connectors = 0; + + return FALSE; + } + + return TRUE; +} + +static boolean +kms_display_program(struct native_display *ndpy, int crtc_idx, + struct native_surface *nsurf, uint x, uint y, + const struct native_connector **nconns, int num_nconns, + const struct native_mode *nmode) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_surface *ksurf = kms_surface(nsurf); + const struct kms_mode *kmode = kms_mode(nmode); + uint32_t connector_ids[32]; + uint32_t buffer_id; + drmModeModeInfo mode_tmp, *mode; + int i; + + if (num_nconns > Elements(connector_ids)) { + _eglLog(_EGL_WARNING, "too many connectors (%d)", num_nconns); + num_nconns = Elements(connector_ids); + } + + if (ksurf) { + if (!kms_surface_init_framebuffers(&ksurf->base, FALSE)) + return FALSE; + + buffer_id = ksurf->front_fb.buffer_id; + /* the mode argument of drmModeSetCrtc is not constified */ + mode_tmp = kmode->mode; + mode = &mode_tmp; + } + else { + /* disable the CRTC */ + buffer_id = 0; + mode = NULL; + num_nconns = 0; + } + + for (i = 0; i < num_nconns; i++) { + struct kms_connector *kconn = kms_connector(nconns[i]); + connector_ids[i] = kconn->connector->connector_id; + } + + if (!kms_display_set_crtc(&kdpy->base, crtc_idx, buffer_id, x, y, + connector_ids, num_nconns, mode)) { + _eglLog(_EGL_WARNING, "failed to set CRTC %d", crtc_idx); + + return FALSE; + } + + if (kdpy->shown_surfaces[crtc_idx]) + kdpy->shown_surfaces[crtc_idx]->is_shown = FALSE; + kdpy->shown_surfaces[crtc_idx] = ksurf; + + /* remember the settings for buffer swapping */ + if (ksurf) { + uint32_t crtc_id = kdpy->saved_crtcs[crtc_idx].crtc->crtc_id; + struct kms_crtc *kcrtc = &ksurf->current_crtc; + + if (kcrtc->crtc) + drmModeFreeCrtc(kcrtc->crtc); + kcrtc->crtc = drmModeGetCrtc(kdpy->fd, crtc_id); + + assert(num_nconns < Elements(kcrtc->connectors)); + memcpy(kcrtc->connectors, connector_ids, + sizeof(*connector_ids) * num_nconns); + kcrtc->num_connectors = num_nconns; + + ksurf->is_shown = TRUE; + } + + return TRUE; +} + +static const struct native_mode ** +kms_display_get_modes(struct native_display *ndpy, + const struct native_connector *nconn, + int *num_modes) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_connector *kconn = kms_connector(nconn); + const struct native_mode **nmodes_return; + int count, i; + + /* delete old data */ + if (kconn->connector) { + drmModeFreeConnector(kconn->connector); + free(kconn->kms_modes); + + kconn->connector = NULL; + kconn->kms_modes = NULL; + kconn->num_modes = 0; + } + + /* detect again */ + kconn->connector = drmModeGetConnector(kdpy->fd, kconn->connector_id); + if (!kconn->connector) + return NULL; + + count = kconn->connector->count_modes; + kconn->kms_modes = calloc(count, sizeof(*kconn->kms_modes)); + if (!kconn->kms_modes) { + drmModeFreeConnector(kconn->connector); + kconn->connector = NULL; + + return NULL; + } + + for (i = 0; i < count; i++) { + struct kms_mode *kmode = &kconn->kms_modes[i]; + drmModeModeInfoPtr mode = &kconn->connector->modes[i]; + + kmode->mode = *mode; + + kmode->base.desc = kmode->mode.name; + kmode->base.width = kmode->mode.hdisplay; + kmode->base.height = kmode->mode.vdisplay; + kmode->base.refresh_rate = kmode->mode.vrefresh / 1000; + } + + nmodes_return = malloc(count * sizeof(*nmodes_return)); + if (nmodes_return) { + for (i = 0; i < count; i++) + nmodes_return[i] = &kconn->kms_modes[i].base; + if (num_modes) + *num_modes = count; + } + + return nmodes_return; +} + +static const struct native_connector ** +kms_display_get_connectors(struct native_display *ndpy, int *num_connectors, + int *num_crtc) +{ + struct kms_display *kdpy = kms_display(ndpy); + const struct native_connector **connectors; + int i; + + if (!kdpy->connectors) { + kdpy->connectors = + calloc(kdpy->resources->count_connectors, sizeof(*kdpy->connectors)); + if (!kdpy->connectors) + return NULL; + + for (i = 0; i < kdpy->resources->count_connectors; i++) { + struct kms_connector *kconn = &kdpy->connectors[i]; + + kconn->connector_id = kdpy->resources->connectors[i]; + /* kconn->connector is allocated when the modes are asked */ + } + + kdpy->num_connectors = kdpy->resources->count_connectors; + } + + connectors = malloc(kdpy->num_connectors * sizeof(*connectors)); + if (connectors) { + for (i = 0; i < kdpy->num_connectors; i++) + connectors[i] = &kdpy->connectors[i].base; + if (num_connectors) + *num_connectors = kdpy->num_connectors; + } + + if (num_crtc) + *num_crtc = kdpy->resources->count_crtcs; + + return connectors; +} + +static struct native_surface * +kms_display_create_scanout_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct kms_surface *ksurf; + + ksurf = kms_display_create_surface(ndpy, + KMS_SURFACE_TYPE_SCANOUT, nconf, width, height); + return &ksurf->base; +} + +static struct native_surface * +kms_display_create_pbuffer_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct kms_surface *ksurf; + + ksurf = kms_display_create_surface(ndpy, + KMS_SURFACE_TYPE_PBUFFER, nconf, width, height); + return &ksurf->base; +} + + +static boolean +kms_display_is_format_supported(struct native_display *ndpy, + enum pipe_format fmt, boolean is_color) +{ + return ndpy->screen->is_format_supported(ndpy->screen, + fmt, PIPE_TEXTURE_2D, + (is_color) ? PIPE_TEXTURE_USAGE_RENDER_TARGET : + PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); +} + +static const struct native_config ** +kms_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct kms_display *kdpy = kms_display(ndpy); + const struct native_config **configs; + + /* first time */ + if (!kdpy->config) { + struct native_config *nconf; + enum pipe_format format; + + kdpy->config = calloc(1, sizeof(*kdpy->config)); + if (!kdpy->config) + return NULL; + + nconf = &kdpy->config->base; + + /* always double-buffered */ + nconf->mode.doubleBufferMode = TRUE; + + format = PIPE_FORMAT_A8R8G8B8_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, TRUE)) { + format = PIPE_FORMAT_B8G8R8A8_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, TRUE)) + format = PIPE_FORMAT_NONE; + } + if (format == PIPE_FORMAT_NONE) + return NULL; + + nconf->color_format = format; + nconf->mode.redBits = 8; + nconf->mode.greenBits = 8; + nconf->mode.blueBits = 8; + nconf->mode.alphaBits = 8; + nconf->mode.rgbBits = 32; + + format = PIPE_FORMAT_S8Z24_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, FALSE)) { + format = PIPE_FORMAT_Z24S8_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, FALSE)) + format = PIPE_FORMAT_NONE; + } + if (format != PIPE_FORMAT_NONE) { + nconf->depth_format = format; + nconf->stencil_format = format; + + nconf->mode.depthBits = 24; + nconf->mode.stencilBits = 8; + nconf->mode.haveDepthBuffer = TRUE; + nconf->mode.haveStencilBuffer = TRUE; + } + + nconf->scanout_bit = TRUE; + nconf->mode.drawableType = GLX_PBUFFER_BIT; + nconf->mode.swapMethod = GLX_SWAP_EXCHANGE_OML; + + nconf->mode.visualID = 0; + nconf->mode.visualType = EGL_NONE; + + nconf->mode.renderType = GLX_RGBA_BIT; + nconf->mode.rgbMode = TRUE; + nconf->mode.xRenderable = FALSE; + } + + configs = malloc(sizeof(*configs)); + if (configs) { + configs[0] = &kdpy->config->base; + if (num_configs) + *num_configs = 1; + } + + return configs; +} + +static void +kms_display_destroy(struct native_display *ndpy) +{ + struct kms_display *kdpy = kms_display(ndpy); + int i; + + if (kdpy->config) + free(kdpy->config); + + if (kdpy->connectors) { + for (i = 0; i < kdpy->num_connectors; i++) { + struct kms_connector *kconn = &kdpy->connectors[i]; + if (kconn->connector) { + drmModeFreeConnector(kconn->connector); + free(kconn->kms_modes); + } + } + free(kdpy->connectors); + } + + if (kdpy->shown_surfaces) + free(kdpy->shown_surfaces); + + if (kdpy->saved_crtcs) { + for (i = 0; i < kdpy->resources->count_crtcs; i++) { + struct kms_crtc *kcrtc = &kdpy->saved_crtcs[i]; + + if (kcrtc->crtc) { + /* restore crtc */ + drmModeSetCrtc(kdpy->fd, kcrtc->crtc->crtc_id, + kcrtc->crtc->buffer_id, kcrtc->crtc->x, kcrtc->crtc->y, + kcrtc->connectors, kcrtc->num_connectors, + &kcrtc->crtc->mode); + + drmModeFreeCrtc(kcrtc->crtc); + } + } + free(kdpy->saved_crtcs); + } + + if (kdpy->resources) + drmModeFreeResources(kdpy->resources); + + if (kdpy->base.screen) + kdpy->base.screen->destroy(kdpy->base.screen); + + if (kdpy->fd >= 0) + drmClose(kdpy->fd); + + if (kdpy->api) + kdpy->api->destroy(kdpy->api); + free(kdpy); +} + +/** + * Initialize KMS and pipe screen. + */ +static boolean +kms_display_init_screen(struct native_display *ndpy) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct drm_create_screen_arg arg; + int fd; + + fd = drmOpen(kdpy->api->name, NULL); + if (fd < 0) { + _eglLog(_EGL_WARNING, "failed to open DRM device"); + return FALSE; + } + +#if 0 + if (drmSetMaster(fd)) { + _eglLog(_EGL_WARNING, "failed to become DRM master"); + return FALSE; + } +#endif + + memset(&arg, 0, sizeof(arg)); + arg.mode = DRM_CREATE_NORMAL; + kdpy->base.screen = kdpy->api->create_screen(kdpy->api, fd, &arg); + if (!kdpy->base.screen) { + _eglLog(_EGL_WARNING, "failed to create DRM screen"); + drmClose(fd); + return FALSE; + } + + kdpy->fd = fd; + + return TRUE; +} + +static struct native_display_modeset kms_display_modeset = { + .get_connectors = kms_display_get_connectors, + .get_modes = kms_display_get_modes, + .create_scanout_surface = kms_display_create_scanout_surface, + .program = kms_display_program +}; + +static struct native_display * +kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api) +{ + struct kms_display *kdpy; + + kdpy = CALLOC_STRUCT(kms_display); + if (!kdpy) + return NULL; + + kdpy->api = api; + if (!kdpy->api) { + _eglLog(_EGL_WARNING, "failed to create DRM API"); + free(kdpy); + return NULL; + } + + kdpy->fd = -1; + if (!kms_display_init_screen(&kdpy->base)) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + /* resources are fixed, unlike crtc, connector, or encoder */ + kdpy->resources = drmModeGetResources(kdpy->fd); + if (!kdpy->resources) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + kdpy->saved_crtcs = + calloc(kdpy->resources->count_crtcs, sizeof(*kdpy->saved_crtcs)); + if (!kdpy->saved_crtcs) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + kdpy->shown_surfaces = + calloc(kdpy->resources->count_crtcs, sizeof(*kdpy->shown_surfaces)); + if (!kdpy->shown_surfaces) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + kdpy->base.destroy = kms_display_destroy; + kdpy->base.get_configs = kms_display_get_configs; + kdpy->base.create_pbuffer_surface = kms_display_create_pbuffer_surface; + + kdpy->base.modeset = &kms_display_modeset; + + return &kdpy->base; +} + +struct native_probe * +native_create_probe(EGLNativeDisplayType dpy) +{ + return NULL; +} + +enum native_probe_result +native_get_probe_result(struct native_probe *nprobe) +{ + return NATIVE_PROBE_UNKNOWN; +} + +/* the api is destroyed with the native display */ +static struct drm_api *drm_api; + +const char * +native_get_name(void) +{ + static char kms_name[32]; + + if (!drm_api) + drm_api = drm_api_create(); + + if (drm_api) + snprintf(kms_name, sizeof(kms_name), "KMS/%s", drm_api->name); + else + snprintf(kms_name, sizeof(kms_name), "KMS"); + + return kms_name; +} + +struct native_display * +native_create_display(EGLNativeDisplayType dpy) +{ + struct native_display *ndpy = NULL; + + if (!drm_api) + drm_api = drm_api_create(); + + if (drm_api) + ndpy = kms_create_display(dpy, drm_api); + + return ndpy; +} diff --git a/src/gallium/state_trackers/egl/kms/native_kms.h b/src/gallium/state_trackers/egl/kms/native_kms.h new file mode 100644 index 00000000000..095186e3cf3 --- /dev/null +++ b/src/gallium/state_trackers/egl/kms/native_kms.h @@ -0,0 +1,139 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NATIVE_KMS_H_ +#define _NATIVE_KMS_H_ + +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "pipe/p_compiler.h" +#include "util/u_format.h" +#include "pipe/p_state.h" +#include "state_tracker/drm_api.h" + +#include "common/native.h" + +enum kms_surface_type { + KMS_SURFACE_TYPE_PBUFFER, + KMS_SURFACE_TYPE_SCANOUT +}; + +struct kms_config; +struct kms_connector; +struct kms_mode; + +struct kms_crtc { + drmModeCrtcPtr crtc; + uint32_t connectors[32]; + int num_connectors; +}; + +struct kms_display { + struct native_display base; + + int fd; + struct drm_api *api; + drmModeResPtr resources; + struct kms_config *config; + + struct kms_connector *connectors; + int num_connectors; + + struct kms_surface **shown_surfaces; + /* save the original settings of the CRTCs */ + struct kms_crtc *saved_crtcs; +}; + +struct kms_framebuffer { + struct pipe_texture *texture; + boolean is_passive; + + uint32_t buffer_id; +}; + +struct kms_surface { + struct native_surface base; + enum kms_surface_type type; + enum pipe_format color_format; + struct kms_display *kdpy; + int width, height; + + struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; + struct kms_framebuffer front_fb, back_fb; + + boolean is_shown; + struct kms_crtc current_crtc; +}; + +struct kms_config { + struct native_config base; +}; + +struct kms_connector { + struct native_connector base; + + uint32_t connector_id; + drmModeConnectorPtr connector; + struct kms_mode *kms_modes; + int num_modes; +}; + +struct kms_mode { + struct native_mode base; + drmModeModeInfo mode; +}; + +static INLINE struct kms_display * +kms_display(const struct native_display *ndpy) +{ + return (struct kms_display *) ndpy; +} + +static INLINE struct kms_surface * +kms_surface(const struct native_surface *nsurf) +{ + return (struct kms_surface *) nsurf; +} + +static INLINE struct kms_config * +kms_config(const struct native_config *nconf) +{ + return (struct kms_config *) nconf; +} + +static INLINE struct kms_connector * +kms_connector(const struct native_connector *nconn) +{ + return (struct kms_connector *) nconn; +} + +static INLINE struct kms_mode * +kms_mode(const struct native_mode *nmode) +{ + return (struct kms_mode *) nmode; +} + +#endif /* _NATIVE_KMS_H_ */ diff --git a/src/gallium/state_trackers/egl/x11/glxinit.c b/src/gallium/state_trackers/egl/x11/glxinit.c new file mode 100644 index 00000000000..1ed2afd3458 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/glxinit.c @@ -0,0 +1,682 @@ +/** + * GLX initialization. Code based on glxext.c, glx_query.c, and + * glcontextmodes.c under src/glx/. The major difference is that DRI + * related code is stripped out. + * + * If the maintenance of this file takes too much time, we should consider + * refactoring glxext.c. + */ + +#include <assert.h> +#include <X11/Xlib.h> +#include <X11/Xproto.h> +#include <X11/extensions/Xext.h> +#include <X11/extensions/extutil.h> +#include <sys/time.h> + +#include "glxinit.h" + +typedef struct GLXGenericGetString +{ + CARD8 reqType; + CARD8 glxCode; + CARD16 length B16; + CARD32 for_whom B32; + CARD32 name B32; +} xGLXGenericGetStringReq; + +#define sz_xGLXGenericGetStringReq 12 +#define X_GLXGenericGetString 0 + +/* Extension required boiler plate */ + +static char *__glXExtensionName = GLX_EXTENSION_NAME; +static XExtensionInfo *__glXExtensionInfo = NULL; + +static int +__glXCloseDisplay(Display * dpy, XExtCodes * codes) +{ + return XextRemoveDisplay(__glXExtensionInfo, dpy); +} + +static /* const */ XExtensionHooks __glXExtensionHooks = { + NULL, /* create_gc */ + NULL, /* copy_gc */ + NULL, /* flush_gc */ + NULL, /* free_gc */ + NULL, /* create_font */ + NULL, /* free_font */ + __glXCloseDisplay, /* close_display */ + NULL, /* wire_to_event */ + NULL, /* event_to_wire */ + NULL, /* error */ + NULL, /* error_string */ +}; + +XEXT_GENERATE_FIND_DISPLAY(__glXFindDisplay, __glXExtensionInfo, + __glXExtensionName, &__glXExtensionHooks, + __GLX_NUMBER_EVENTS, NULL) + +static GLint +_gl_convert_from_x_visual_type(int visualType) +{ +#define NUM_VISUAL_TYPES 6 + static const int glx_visual_types[NUM_VISUAL_TYPES] = { + GLX_STATIC_GRAY, GLX_GRAY_SCALE, + GLX_STATIC_COLOR, GLX_PSEUDO_COLOR, + GLX_TRUE_COLOR, GLX_DIRECT_COLOR + }; + + return ((unsigned) visualType < NUM_VISUAL_TYPES) + ? glx_visual_types[visualType] : GLX_NONE; +} + +static __GLcontextModes * +_gl_context_modes_create(unsigned count, size_t minimum_size) +{ + const size_t size = (minimum_size > sizeof(__GLcontextModes)) + ? minimum_size : sizeof(__GLcontextModes); + __GLcontextModes *base = NULL; + __GLcontextModes **next; + unsigned i; + + next = &base; + for (i = 0; i < count; i++) { + *next = (__GLcontextModes *) Xmalloc(size); + if (*next == NULL) { + _gl_context_modes_destroy(base); + base = NULL; + break; + } + + memset(*next, 0, size); + (*next)->visualID = GLX_DONT_CARE; + (*next)->visualType = GLX_DONT_CARE; + (*next)->visualRating = GLX_NONE; + (*next)->transparentPixel = GLX_NONE; + (*next)->transparentRed = GLX_DONT_CARE; + (*next)->transparentGreen = GLX_DONT_CARE; + (*next)->transparentBlue = GLX_DONT_CARE; + (*next)->transparentAlpha = GLX_DONT_CARE; + (*next)->transparentIndex = GLX_DONT_CARE; + (*next)->xRenderable = GLX_DONT_CARE; + (*next)->fbconfigID = GLX_DONT_CARE; + (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML; + (*next)->bindToTextureRgb = GLX_DONT_CARE; + (*next)->bindToTextureRgba = GLX_DONT_CARE; + (*next)->bindToMipmapTexture = GLX_DONT_CARE; + (*next)->bindToTextureTargets = GLX_DONT_CARE; + (*next)->yInverted = GLX_DONT_CARE; + + next = &((*next)->next); + } + + return base; +} + +_X_HIDDEN void +_gl_context_modes_destroy(__GLcontextModes * modes) +{ + while (modes != NULL) { + __GLcontextModes *const next = modes->next; + + Xfree(modes); + modes = next; + } +} + +_X_HIDDEN char * +__glXQueryServerString(Display * dpy, int opcode, CARD32 screen, CARD32 name) +{ + xGLXGenericGetStringReq *req; + xGLXSingleReply reply; + int length; + int numbytes; + char *buf; + CARD32 for_whom = screen; + CARD32 glxCode = X_GLXQueryServerString; + + + LockDisplay(dpy); + + + /* All of the GLX protocol requests for getting a string from the server + * look the same. The exact meaning of the for_whom field is usually + * either the screen number (for glXQueryServerString) or the context tag + * (for GLXSingle). + */ + + GetReq(GLXGenericGetString, req); + req->reqType = opcode; + req->glxCode = glxCode; + req->for_whom = for_whom; + req->name = name; + + _XReply(dpy, (xReply *) & reply, 0, False); + + length = reply.length * 4; + numbytes = reply.size; + + buf = (char *) Xmalloc(numbytes); + if (buf != NULL) { + _XRead(dpy, buf, numbytes); + length -= numbytes; + } + + _XEatData(dpy, length); + + UnlockDisplay(dpy); + SyncHandle(); + + return buf; +} + +/************************************************************************/ +/* +** Free the per screen configs data as well as the array of +** __glXScreenConfigs. +*/ +static void +FreeScreenConfigs(__GLXdisplayPrivate * priv) +{ + __GLXscreenConfigs *psc; + GLint i, screens; + + /* Free screen configuration information */ + psc = priv->screenConfigs; + screens = ScreenCount(priv->dpy); + for (i = 0; i < screens; i++, psc++) { + if (psc->configs) { + _gl_context_modes_destroy(psc->configs); + psc->configs = NULL; /* NOTE: just for paranoia */ + } + if (psc->visuals) { + _gl_context_modes_destroy(psc->visuals); + psc->visuals = NULL; /* NOTE: just for paranoia */ + } + Xfree((char *) psc->serverGLXexts); + } + XFree((char *) priv->screenConfigs); + priv->screenConfigs = NULL; +} + +/* +** Release the private memory referred to in a display private +** structure. The caller will free the extension structure. +*/ +static int +__glXFreeDisplayPrivate(XExtData * extension) +{ + __GLXdisplayPrivate *priv; + + priv = (__GLXdisplayPrivate *) extension->private_data; + FreeScreenConfigs(priv); + if (priv->serverGLXvendor) { + Xfree((char *) priv->serverGLXvendor); + priv->serverGLXvendor = 0x0; /* to protect against double free's */ + } + if (priv->serverGLXversion) { + Xfree((char *) priv->serverGLXversion); + priv->serverGLXversion = 0x0; /* to protect against double free's */ + } + + Xfree((char *) priv); + return 0; +} + +/************************************************************************/ + +/* +** Query the version of the GLX extension. This procedure works even if +** the client extension is not completely set up. +*/ +static Bool +QueryVersion(Display * dpy, int opcode, int *major, int *minor) +{ + xGLXQueryVersionReq *req; + xGLXQueryVersionReply reply; + + /* Send the glXQueryVersion request */ + LockDisplay(dpy); + GetReq(GLXQueryVersion, req); + req->reqType = opcode; + req->glxCode = X_GLXQueryVersion; + req->majorVersion = GLX_MAJOR_VERSION; + req->minorVersion = GLX_MINOR_VERSION; + _XReply(dpy, (xReply *) & reply, 0, False); + UnlockDisplay(dpy); + SyncHandle(); + + if (reply.majorVersion != GLX_MAJOR_VERSION) { + /* + ** The server does not support the same major release as this + ** client. + */ + return GL_FALSE; + } + *major = reply.majorVersion; + *minor = min(reply.minorVersion, GLX_MINOR_VERSION); + return GL_TRUE; +} + +_X_HIDDEN void +__glXInitializeVisualConfigFromTags(__GLcontextModes * config, int count, + const INT32 * bp, Bool tagged_only, + Bool fbconfig_style_tags) +{ + int i; + + if (!tagged_only) { + /* Copy in the first set of properties */ + config->visualID = *bp++; + + config->visualType = _gl_convert_from_x_visual_type(*bp++); + + config->rgbMode = *bp++; + + config->redBits = *bp++; + config->greenBits = *bp++; + config->blueBits = *bp++; + config->alphaBits = *bp++; + config->accumRedBits = *bp++; + config->accumGreenBits = *bp++; + config->accumBlueBits = *bp++; + config->accumAlphaBits = *bp++; + + config->doubleBufferMode = *bp++; + config->stereoMode = *bp++; + + config->rgbBits = *bp++; + config->depthBits = *bp++; + config->stencilBits = *bp++; + config->numAuxBuffers = *bp++; + config->level = *bp++; + + count -= __GLX_MIN_CONFIG_PROPS; + } + + /* + ** Additional properties may be in a list at the end + ** of the reply. They are in pairs of property type + ** and property value. + */ + +#define FETCH_OR_SET(tag) \ + config-> tag = ( fbconfig_style_tags ) ? *bp++ : 1 + + for (i = 0; i < count; i += 2) { + switch (*bp++) { + case GLX_RGBA: + FETCH_OR_SET(rgbMode); + break; + case GLX_BUFFER_SIZE: + config->rgbBits = *bp++; + break; + case GLX_LEVEL: + config->level = *bp++; + break; + case GLX_DOUBLEBUFFER: + FETCH_OR_SET(doubleBufferMode); + break; + case GLX_STEREO: + FETCH_OR_SET(stereoMode); + break; + case GLX_AUX_BUFFERS: + config->numAuxBuffers = *bp++; + break; + case GLX_RED_SIZE: + config->redBits = *bp++; + break; + case GLX_GREEN_SIZE: + config->greenBits = *bp++; + break; + case GLX_BLUE_SIZE: + config->blueBits = *bp++; + break; + case GLX_ALPHA_SIZE: + config->alphaBits = *bp++; + break; + case GLX_DEPTH_SIZE: + config->depthBits = *bp++; + break; + case GLX_STENCIL_SIZE: + config->stencilBits = *bp++; + break; + case GLX_ACCUM_RED_SIZE: + config->accumRedBits = *bp++; + break; + case GLX_ACCUM_GREEN_SIZE: + config->accumGreenBits = *bp++; + break; + case GLX_ACCUM_BLUE_SIZE: + config->accumBlueBits = *bp++; + break; + case GLX_ACCUM_ALPHA_SIZE: + config->accumAlphaBits = *bp++; + break; + case GLX_VISUAL_CAVEAT_EXT: + config->visualRating = *bp++; + break; + case GLX_X_VISUAL_TYPE: + config->visualType = *bp++; + break; + case GLX_TRANSPARENT_TYPE: + config->transparentPixel = *bp++; + break; + case GLX_TRANSPARENT_INDEX_VALUE: + config->transparentIndex = *bp++; + break; + case GLX_TRANSPARENT_RED_VALUE: + config->transparentRed = *bp++; + break; + case GLX_TRANSPARENT_GREEN_VALUE: + config->transparentGreen = *bp++; + break; + case GLX_TRANSPARENT_BLUE_VALUE: + config->transparentBlue = *bp++; + break; + case GLX_TRANSPARENT_ALPHA_VALUE: + config->transparentAlpha = *bp++; + break; + case GLX_VISUAL_ID: + config->visualID = *bp++; + break; + case GLX_DRAWABLE_TYPE: + config->drawableType = *bp++; + break; + case GLX_RENDER_TYPE: + config->renderType = *bp++; + break; + case GLX_X_RENDERABLE: + config->xRenderable = *bp++; + break; + case GLX_FBCONFIG_ID: + config->fbconfigID = *bp++; + break; + case GLX_MAX_PBUFFER_WIDTH: + config->maxPbufferWidth = *bp++; + break; + case GLX_MAX_PBUFFER_HEIGHT: + config->maxPbufferHeight = *bp++; + break; + case GLX_MAX_PBUFFER_PIXELS: + config->maxPbufferPixels = *bp++; + break; + case GLX_OPTIMAL_PBUFFER_WIDTH_SGIX: + config->optimalPbufferWidth = *bp++; + break; + case GLX_OPTIMAL_PBUFFER_HEIGHT_SGIX: + config->optimalPbufferHeight = *bp++; + break; + case GLX_VISUAL_SELECT_GROUP_SGIX: + config->visualSelectGroup = *bp++; + break; + case GLX_SWAP_METHOD_OML: + config->swapMethod = *bp++; + break; + case GLX_SAMPLE_BUFFERS_SGIS: + config->sampleBuffers = *bp++; + break; + case GLX_SAMPLES_SGIS: + config->samples = *bp++; + break; + case GLX_BIND_TO_TEXTURE_RGB_EXT: + config->bindToTextureRgb = *bp++; + break; + case GLX_BIND_TO_TEXTURE_RGBA_EXT: + config->bindToTextureRgba = *bp++; + break; + case GLX_BIND_TO_MIPMAP_TEXTURE_EXT: + config->bindToMipmapTexture = *bp++; + break; + case GLX_BIND_TO_TEXTURE_TARGETS_EXT: + config->bindToTextureTargets = *bp++; + break; + case GLX_Y_INVERTED_EXT: + config->yInverted = *bp++; + break; + case None: + i = count; + break; + default: + break; + } + } + + config->renderType = + (config->rgbMode) ? GLX_RGBA_BIT : GLX_COLOR_INDEX_BIT; + + config->haveAccumBuffer = ((config->accumRedBits + + config->accumGreenBits + + config->accumBlueBits + + config->accumAlphaBits) > 0); + config->haveDepthBuffer = (config->depthBits > 0); + config->haveStencilBuffer = (config->stencilBits > 0); +} + +static __GLcontextModes * +createConfigsFromProperties(Display * dpy, int nvisuals, int nprops, + int screen, GLboolean tagged_only) +{ + INT32 buf[__GLX_TOTAL_CONFIG], *props; + unsigned prop_size; + __GLcontextModes *modes, *m; + int i; + + if (nprops == 0) + return NULL; + + /* FIXME: Is the __GLX_MIN_CONFIG_PROPS test correct for FBconfigs? */ + + /* Check number of properties */ + if (nprops < __GLX_MIN_CONFIG_PROPS || nprops > __GLX_MAX_CONFIG_PROPS) + return NULL; + + /* Allocate memory for our config structure */ + modes = _gl_context_modes_create(nvisuals, sizeof(__GLcontextModes)); + if (!modes) + return NULL; + + prop_size = nprops * __GLX_SIZE_INT32; + if (prop_size <= sizeof(buf)) + props = buf; + else + props = Xmalloc(prop_size); + + /* Read each config structure and convert it into our format */ + m = modes; + for (i = 0; i < nvisuals; i++) { + _XRead(dpy, (char *) props, prop_size); + /* Older X servers don't send this so we default it here. */ + m->drawableType = GLX_WINDOW_BIT; + __glXInitializeVisualConfigFromTags(m, nprops, props, + tagged_only, GL_TRUE); + m->screen = screen; + m = m->next; + } + + if (props != buf) + Xfree(props); + + return modes; +} + +static GLboolean +getVisualConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) +{ + xGLXGetVisualConfigsReq *req; + __GLXscreenConfigs *psc; + xGLXGetVisualConfigsReply reply; + + LockDisplay(dpy); + + psc = priv->screenConfigs + screen; + psc->visuals = NULL; + GetReq(GLXGetVisualConfigs, req); + req->reqType = priv->majorOpcode; + req->glxCode = X_GLXGetVisualConfigs; + req->screen = screen; + + if (!_XReply(dpy, (xReply *) & reply, 0, False)) + goto out; + + psc->visuals = createConfigsFromProperties(dpy, + reply.numVisuals, + reply.numProps, + screen, GL_FALSE); + + out: + UnlockDisplay(dpy); + return psc->visuals != NULL; +} + +static GLboolean +getFBConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) +{ + xGLXGetFBConfigsReq *fb_req; + xGLXGetFBConfigsSGIXReq *sgi_req; + xGLXVendorPrivateWithReplyReq *vpreq; + xGLXGetFBConfigsReply reply; + __GLXscreenConfigs *psc; + + psc = priv->screenConfigs + screen; + psc->serverGLXexts = + __glXQueryServerString(dpy, priv->majorOpcode, screen, GLX_EXTENSIONS); + + LockDisplay(dpy); + + psc->configs = NULL; + if (atof(priv->serverGLXversion) >= 1.3) { + GetReq(GLXGetFBConfigs, fb_req); + fb_req->reqType = priv->majorOpcode; + fb_req->glxCode = X_GLXGetFBConfigs; + fb_req->screen = screen; + } + else if (strstr(psc->serverGLXexts, "GLX_SGIX_fbconfig") != NULL) { + GetReqExtra(GLXVendorPrivateWithReply, + sz_xGLXGetFBConfigsSGIXReq + + sz_xGLXVendorPrivateWithReplyReq, vpreq); + sgi_req = (xGLXGetFBConfigsSGIXReq *) vpreq; + sgi_req->reqType = priv->majorOpcode; + sgi_req->glxCode = X_GLXVendorPrivateWithReply; + sgi_req->vendorCode = X_GLXvop_GetFBConfigsSGIX; + sgi_req->screen = screen; + } + else + goto out; + + if (!_XReply(dpy, (xReply *) & reply, 0, False)) + goto out; + + psc->configs = createConfigsFromProperties(dpy, + reply.numFBConfigs, + reply.numAttribs * 2, + screen, GL_TRUE); + + out: + UnlockDisplay(dpy); + return psc->configs != NULL; +} + +static GLboolean +AllocAndFetchScreenConfigs(Display * dpy, __GLXdisplayPrivate * priv) +{ + __GLXscreenConfigs *psc; + GLint i, screens; + + /* + ** First allocate memory for the array of per screen configs. + */ + screens = ScreenCount(dpy); + psc = (__GLXscreenConfigs *) Xmalloc(screens * sizeof(__GLXscreenConfigs)); + if (!psc) { + return GL_FALSE; + } + memset(psc, 0, screens * sizeof(__GLXscreenConfigs)); + priv->screenConfigs = psc; + + priv->serverGLXversion = + __glXQueryServerString(dpy, priv->majorOpcode, 0, GLX_VERSION); + if (priv->serverGLXversion == NULL) { + FreeScreenConfigs(priv); + return GL_FALSE; + } + + for (i = 0; i < screens; i++, psc++) { + getFBConfigs(dpy, priv, i); + getVisualConfigs(dpy, priv, i); + psc->scr = i; + psc->dpy = dpy; + } + + SyncHandle(); + + return GL_TRUE; +} + +_X_HIDDEN __GLXdisplayPrivate * +__glXInitialize(Display * dpy) +{ + XExtDisplayInfo *info = __glXFindDisplay(dpy); + XExtData **privList, *private, *found; + __GLXdisplayPrivate *dpyPriv; + XEDataObject dataObj; + int major, minor; + + if (!XextHasExtension(info)) + return NULL; + + /* See if a display private already exists. If so, return it */ + dataObj.display = dpy; + privList = XEHeadOfExtensionList(dataObj); + found = XFindOnExtensionList(privList, info->codes->extension); + if (found) + return (__GLXdisplayPrivate *) found->private_data; + + /* See if the versions are compatible */ + if (!QueryVersion(dpy, info->codes->major_opcode, &major, &minor)) + return NULL; + + /* + ** Allocate memory for all the pieces needed for this buffer. + */ + private = (XExtData *) Xmalloc(sizeof(XExtData)); + if (!private) + return NULL; + dpyPriv = (__GLXdisplayPrivate *) Xcalloc(1, sizeof(__GLXdisplayPrivate)); + if (!dpyPriv) { + Xfree(private); + return NULL; + } + + /* + ** Init the display private and then read in the screen config + ** structures from the server. + */ + dpyPriv->majorOpcode = info->codes->major_opcode; + dpyPriv->majorVersion = major; + dpyPriv->minorVersion = minor; + dpyPriv->dpy = dpy; + + dpyPriv->serverGLXvendor = NULL; + dpyPriv->serverGLXversion = NULL; + + if (!AllocAndFetchScreenConfigs(dpy, dpyPriv)) { + Xfree(dpyPriv); + Xfree(private); + return NULL; + } + + /* + ** Fill in the private structure. This is the actual structure that + ** hangs off of the Display structure. Our private structure is + ** referred to by this structure. Got that? + */ + private->number = info->codes->extension; + private->next = 0; + private->free_private = __glXFreeDisplayPrivate; + private->private_data = (char *) dpyPriv; + XAddToExtensionList(privList, private); + + return dpyPriv; +} diff --git a/src/gallium/state_trackers/egl/x11/glxinit.h b/src/gallium/state_trackers/egl/x11/glxinit.h new file mode 100644 index 00000000000..1cc7c460fe2 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/glxinit.h @@ -0,0 +1,11 @@ +#ifndef GLXINIT_INCLUDED +#define GLXINIT_INCLUDED + +#include <X11/Xlib.h> +#include "glxclient.h" + +/* this is used by DRI loaders */ +extern void +_gl_context_modes_destroy(__GLcontextModes * modes); + +#endif /* GLXINIT_INCLUDED */ diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c new file mode 100644 index 00000000000..5f2fd412604 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -0,0 +1,693 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "pipe/p_compiler.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "state_tracker/drm_api.h" +#include "egllog.h" + +#include "native_x11.h" +#include "x11_screen.h" + +enum dri2_surface_type { + DRI2_SURFACE_TYPE_WINDOW, + DRI2_SURFACE_TYPE_PIXMAP, + DRI2_SURFACE_TYPE_PBUFFER +}; + +struct dri2_display { + struct native_display base; + Display *dpy; + boolean own_dpy; + + struct drm_api *api; + struct x11_screen *xscr; + int xscr_number; + + struct dri2_config *configs; + int num_configs; +}; + +struct dri2_surface { + struct native_surface base; + Drawable drawable; + enum dri2_surface_type type; + enum pipe_format color_format; + struct dri2_display *dri2dpy; + + struct pipe_texture *pbuffer_textures[NUM_NATIVE_ATTACHMENTS]; + boolean have_back, have_fake; + int width, height; + unsigned int sequence_number; +}; + +struct dri2_config { + struct native_config base; +}; + +static INLINE struct dri2_display * +dri2_display(const struct native_display *ndpy) +{ + return (struct dri2_display *) ndpy; +} + +static INLINE struct dri2_surface * +dri2_surface(const struct native_surface *nsurf) +{ + return (struct dri2_surface *) nsurf; +} + +static INLINE struct dri2_config * +dri2_config(const struct native_config *nconf) +{ + return (struct dri2_config *) nconf; +} + +static boolean +dri2_surface_flush_frontbuffer(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + + /* pbuffer is private */ + if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) + return TRUE; + + /* copy to real front buffer */ + if (dri2surf->have_fake) + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferFakeFrontLeft, DRI2BufferFrontLeft); + + return TRUE; +} + +static boolean +dri2_surface_swap_buffers(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + + /* pbuffer is private */ + if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) + return TRUE; + + /* copy to front buffer */ + if (dri2surf->have_back) + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferBackLeft, DRI2BufferFrontLeft); + + /* and update fake front buffer */ + if (dri2surf->have_fake) + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferFrontLeft, DRI2BufferFakeFrontLeft); + + return TRUE; +} + +static boolean +dri2_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + unsigned int dri2atts[NUM_NATIVE_ATTACHMENTS]; + struct pipe_texture templ; + struct x11_drawable_buffer *xbufs; + int num_ins, num_outs, att, i; + + if (attachment_mask) { + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.last_level = 0; + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + templ.depth0 = 1; + templ.format = dri2surf->color_format; + templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; + + if (textures) + memset(textures, 0, sizeof(*textures) * NUM_NATIVE_ATTACHMENTS); + } + + /* create textures for pbuffer */ + if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) { + struct pipe_screen *screen = dri2dpy->base.screen; + + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct pipe_texture *ptex = dri2surf->pbuffer_textures[att]; + + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) + continue; + + if (!ptex) { + ptex = screen->texture_create(screen, &templ); + dri2surf->pbuffer_textures[att] = ptex; + } + + if (textures) + pipe_texture_reference(&textures[att], ptex); + } + + if (seq_num) + *seq_num = dri2surf->sequence_number; + if (width) + *width = dri2surf->width; + if (height) + *height = dri2surf->height; + + return TRUE; + } + + /* prepare the attachments */ + num_ins = 0; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + if (native_attachment_mask_test(attachment_mask, att)) { + unsigned int dri2att; + + switch (att) { + case NATIVE_ATTACHMENT_FRONT_LEFT: + dri2att = DRI2BufferFrontLeft; + break; + case NATIVE_ATTACHMENT_BACK_LEFT: + dri2att = DRI2BufferBackLeft; + break; + case NATIVE_ATTACHMENT_FRONT_RIGHT: + dri2att = DRI2BufferFrontRight; + break; + case NATIVE_ATTACHMENT_BACK_RIGHT: + dri2att = DRI2BufferBackRight; + break; + default: + assert(0); + dri2att = 0; + break; + } + + dri2atts[num_ins] = dri2att; + num_ins++; + } + } + + dri2surf->have_back = FALSE; + dri2surf->have_fake = FALSE; + + /* remember old geometry */ + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + + xbufs = x11_drawable_get_buffers(dri2dpy->xscr, dri2surf->drawable, + &dri2surf->width, &dri2surf->height, + dri2atts, FALSE, num_ins, &num_outs); + if (!xbufs) + return FALSE; + + if (templ.width0 != dri2surf->width || templ.height0 != dri2surf->height) { + /* are there cases where the buffers change and the geometry doesn't? */ + dri2surf->sequence_number++; + + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + } + + for (i = 0; i < num_outs; i++) { + struct x11_drawable_buffer *xbuf = &xbufs[i]; + const char *desc; + enum native_attachment natt; + + switch (xbuf->attachment) { + case DRI2BufferFrontLeft: + natt = NATIVE_ATTACHMENT_FRONT_LEFT; + desc = "DRI2 Front Buffer"; + break; + case DRI2BufferFakeFrontLeft: + natt = NATIVE_ATTACHMENT_FRONT_LEFT; + desc = "DRI2 Fake Front Buffer"; + dri2surf->have_fake = TRUE; + break; + case DRI2BufferBackLeft: + natt = NATIVE_ATTACHMENT_BACK_LEFT; + desc = "DRI2 Back Buffer"; + dri2surf->have_back = TRUE; + break; + default: + desc = NULL; + break; + } + + if (!desc || !native_attachment_mask_test(attachment_mask, natt) || + (textures && textures[natt])) { + if (!desc) + _eglLog(_EGL_WARNING, "unknown buffer %d", xbuf->attachment); + else if (!native_attachment_mask_test(attachment_mask, natt)) + _eglLog(_EGL_WARNING, "unexpected buffer %d", xbuf->attachment); + else + _eglLog(_EGL_WARNING, "both real and fake front buffers are listed"); + continue; + } + + if (textures) { + struct pipe_texture *ptex = + dri2dpy->api->texture_from_shared_handle(dri2dpy->api, + dri2dpy->base.screen, &templ, + desc, xbuf->pitch, xbuf->name); + if (ptex) { + /* the caller owns the textures */ + textures[natt] = ptex; + } + } + } + + free(xbufs); + + if (seq_num) + *seq_num = dri2surf->sequence_number; + if (width) + *width = dri2surf->width; + if (height) + *height = dri2surf->height; + + return TRUE; +} + +static void +dri2_surface_wait(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + + if (dri2surf->have_fake) { + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferFrontLeft, DRI2BufferFakeFrontLeft); + } +} + +static void +dri2_surface_destroy(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + int i; + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct pipe_texture *ptex = dri2surf->pbuffer_textures[i]; + pipe_texture_reference(&ptex, NULL); + } + + if (dri2surf->drawable) + x11_drawable_enable_dri2(dri2surf->dri2dpy->xscr, + dri2surf->drawable, FALSE); + free(dri2surf); +} + +static struct dri2_surface * +dri2_display_create_surface(struct native_display *ndpy, + enum dri2_surface_type type, + Drawable drawable, + const struct native_config *nconf) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + struct dri2_config *dri2conf = dri2_config(nconf); + struct dri2_surface *dri2surf; + + dri2surf = CALLOC_STRUCT(dri2_surface); + if (!dri2surf) + return NULL; + + if (drawable) + x11_drawable_enable_dri2(dri2dpy->xscr, drawable, TRUE); + + dri2surf->dri2dpy = dri2dpy; + dri2surf->type = type; + dri2surf->drawable = drawable; + dri2surf->color_format = dri2conf->base.color_format; + + dri2surf->base.destroy = dri2_surface_destroy; + dri2surf->base.swap_buffers = dri2_surface_swap_buffers; + dri2surf->base.flush_frontbuffer = dri2_surface_flush_frontbuffer; + dri2surf->base.validate = dri2_surface_validate; + dri2surf->base.wait = dri2_surface_wait; + + return dri2surf; +} + +static struct native_surface * +dri2_display_create_window_surface(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf) +{ + struct dri2_surface *dri2surf; + + dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_WINDOW, + (Drawable) win, nconf); + return (dri2surf) ? &dri2surf->base : NULL; +} + +static struct native_surface * +dri2_display_create_pixmap_surface(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct dri2_surface *dri2surf; + + dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_PIXMAP, + (Drawable) pix, nconf); + return (dri2surf) ? &dri2surf->base : NULL; +} + +static struct native_surface * +dri2_display_create_pbuffer_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct dri2_surface *dri2surf; + + dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_PBUFFER, + (Drawable) None, nconf); + if (dri2surf) { + dri2surf->width = width; + dri2surf->height = height; + } + return (dri2surf) ? &dri2surf->base : NULL; +} + +static int +choose_color_format(const __GLcontextModes *mode, enum pipe_format formats[32]) +{ + int count = 0; + + switch (mode->rgbBits) { + case 32: + formats[count++] = PIPE_FORMAT_A8R8G8B8_UNORM; + formats[count++] = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case 24: + formats[count++] = PIPE_FORMAT_X8R8G8B8_UNORM; + formats[count++] = PIPE_FORMAT_B8G8R8X8_UNORM; + formats[count++] = PIPE_FORMAT_A8R8G8B8_UNORM; + formats[count++] = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case 16: + formats[count++] = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + break; + } + + return count; +} + +static int +choose_depth_stencil_format(const __GLcontextModes *mode, + enum pipe_format formats[32]) +{ + int count = 0; + + switch (mode->depthBits) { + case 32: + formats[count++] = PIPE_FORMAT_Z32_UNORM; + break; + case 24: + if (mode->stencilBits) { + formats[count++] = PIPE_FORMAT_S8Z24_UNORM; + formats[count++] = PIPE_FORMAT_Z24S8_UNORM; + } + else { + formats[count++] = PIPE_FORMAT_X8Z24_UNORM; + formats[count++] = PIPE_FORMAT_Z24X8_UNORM; + } + break; + case 16: + formats[count++] = PIPE_FORMAT_Z16_UNORM; + break; + default: + break; + } + + return count; +} + +static boolean +is_format_supported(struct pipe_screen *screen, + enum pipe_format fmt, boolean is_color) +{ + return screen->is_format_supported(screen, fmt, PIPE_TEXTURE_2D, + (is_color) ? PIPE_TEXTURE_USAGE_RENDER_TARGET : + PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); +} + +static boolean +dri2_display_convert_config(struct native_display *ndpy, + const __GLcontextModes *mode, + struct native_config *nconf) +{ + enum pipe_format formats[32]; + int num_formats, i; + + if (!(mode->renderType & GLX_RGBA_BIT) || !mode->rgbMode) + return FALSE; + + /* skip single-buffered configs */ + if (!mode->doubleBufferMode) + return FALSE; + + nconf->mode = *mode; + nconf->mode.renderType = GLX_RGBA_BIT; + nconf->mode.rgbMode = TRUE; + /* pbuffer is allocated locally and is always supported */ + nconf->mode.drawableType |= GLX_PBUFFER_BIT; + /* the swap method is always copy */ + nconf->mode.swapMethod = GLX_SWAP_COPY_OML; + + /* fix up */ + nconf->mode.rgbBits = + nconf->mode.redBits + nconf->mode.greenBits + + nconf->mode.blueBits + nconf->mode.alphaBits; + if (!(nconf->mode.drawableType & GLX_WINDOW_BIT)) { + nconf->mode.visualID = 0; + nconf->mode.visualType = GLX_NONE; + } + if (!(nconf->mode.drawableType & GLX_PBUFFER_BIT)) { + nconf->mode.bindToTextureRgb = FALSE; + nconf->mode.bindToTextureRgba = FALSE; + } + + nconf->color_format = PIPE_FORMAT_NONE; + nconf->depth_format = PIPE_FORMAT_NONE; + nconf->stencil_format = PIPE_FORMAT_NONE; + + /* choose color format */ + num_formats = choose_color_format(mode, formats); + for (i = 0; i < num_formats; i++) { + if (is_format_supported(ndpy->screen, formats[i], TRUE)) { + nconf->color_format = formats[i]; + break; + } + } + if (nconf->color_format == PIPE_FORMAT_NONE) + return FALSE; + + /* choose depth/stencil format */ + num_formats = choose_depth_stencil_format(mode, formats); + for (i = 0; i < num_formats; i++) { + if (is_format_supported(ndpy->screen, formats[i], FALSE)) { + nconf->depth_format = formats[i]; + nconf->stencil_format = formats[i]; + break; + } + } + if ((nconf->mode.depthBits && nconf->depth_format == PIPE_FORMAT_NONE) || + (nconf->mode.stencilBits && nconf->stencil_format == PIPE_FORMAT_NONE)) + return FALSE; + + return TRUE; +} + +static const struct native_config ** +dri2_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + const struct native_config **configs; + int i; + + /* first time */ + if (!dri2dpy->configs) { + const __GLcontextModes *modes; + int num_modes, count; + + modes = x11_screen_get_glx_configs(dri2dpy->xscr); + if (!modes) + return NULL; + num_modes = x11_context_modes_count(modes); + + dri2dpy->configs = calloc(num_modes, sizeof(*dri2dpy->configs)); + if (!dri2dpy->configs) + return NULL; + + count = 0; + for (i = 0; i < num_modes; i++) { + struct native_config *nconf = &dri2dpy->configs[count].base; + if (dri2_display_convert_config(&dri2dpy->base, modes, nconf)) + count++; + modes = modes->next; + } + + dri2dpy->num_configs = count; + } + + configs = malloc(dri2dpy->num_configs * sizeof(*configs)); + if (configs) { + for (i = 0; i < dri2dpy->num_configs; i++) + configs[i] = (const struct native_config *) &dri2dpy->configs[i]; + if (num_configs) + *num_configs = dri2dpy->num_configs; + } + + return configs; +} + +static boolean +dri2_display_is_pixmap_supported(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + uint depth, nconf_depth; + + depth = x11_drawable_get_depth(dri2dpy->xscr, (Drawable) pix); + nconf_depth = util_format_get_blocksizebits(nconf->color_format); + + /* simple depth match for now */ + return (depth == nconf_depth || (depth == 24 && depth + 8 == nconf_depth)); +} + +static void +dri2_display_destroy(struct native_display *ndpy) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + + if (dri2dpy->configs) + free(dri2dpy->configs); + + if (dri2dpy->base.screen) + dri2dpy->base.screen->destroy(dri2dpy->base.screen); + + if (dri2dpy->xscr) + x11_screen_destroy(dri2dpy->xscr); + if (dri2dpy->own_dpy) + XCloseDisplay(dri2dpy->dpy); + if (dri2dpy->api && dri2dpy->api->destroy) + dri2dpy->api->destroy(dri2dpy->api); + free(dri2dpy); +} + +/** + * Initialize DRI2 and pipe screen. + */ +static boolean +dri2_display_init_screen(struct native_display *ndpy) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + const char *driver = dri2dpy->api->name; + struct drm_create_screen_arg arg; + int fd; + + if (!x11_screen_support(dri2dpy->xscr, X11_SCREEN_EXTENSION_DRI2) || + !x11_screen_support(dri2dpy->xscr, X11_SCREEN_EXTENSION_GLX)) { + _eglLog(_EGL_WARNING, "GLX/DRI2 is not supported"); + return FALSE; + } + + fd = x11_screen_enable_dri2(dri2dpy->xscr, driver); + if (fd < 0) + return FALSE; + + memset(&arg, 0, sizeof(arg)); + arg.mode = DRM_CREATE_NORMAL; + dri2dpy->base.screen = dri2dpy->api->create_screen(dri2dpy->api, fd, &arg); + if (!dri2dpy->base.screen) { + _eglLog(_EGL_WARNING, "failed to create DRM screen"); + return FALSE; + } + + return TRUE; +} + +struct native_display * +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api) +{ + struct dri2_display *dri2dpy; + + dri2dpy = CALLOC_STRUCT(dri2_display); + if (!dri2dpy) + return NULL; + + dri2dpy->api = api; + if (!dri2dpy->api) { + _eglLog(_EGL_WARNING, "failed to create DRM API"); + free(dri2dpy); + return NULL; + } + + dri2dpy->dpy = dpy; + if (!dri2dpy->dpy) { + dri2dpy->dpy = XOpenDisplay(NULL); + if (!dri2dpy->dpy) { + dri2_display_destroy(&dri2dpy->base); + return NULL; + } + dri2dpy->own_dpy = TRUE; + } + + dri2dpy->xscr_number = DefaultScreen(dri2dpy->dpy); + dri2dpy->xscr = x11_screen_create(dri2dpy->dpy, dri2dpy->xscr_number); + if (!dri2dpy->xscr) { + dri2_display_destroy(&dri2dpy->base); + return NULL; + } + + if (!dri2_display_init_screen(&dri2dpy->base)) { + dri2_display_destroy(&dri2dpy->base); + return NULL; + } + + dri2dpy->base.destroy = dri2_display_destroy; + dri2dpy->base.get_configs = dri2_display_get_configs; + dri2dpy->base.is_pixmap_supported = dri2_display_is_pixmap_supported; + dri2dpy->base.create_window_surface = dri2_display_create_window_surface; + dri2dpy->base.create_pixmap_surface = dri2_display_create_pixmap_surface; + dri2dpy->base.create_pbuffer_surface = dri2_display_create_pbuffer_surface; + + return &dri2dpy->base; +} diff --git a/src/gallium/state_trackers/egl/x11/native_x11.c b/src/gallium/state_trackers/egl/x11/native_x11.c new file mode 100644 index 00000000000..8eb542bd827 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/native_x11.c @@ -0,0 +1,150 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "state_tracker/drm_api.h" +#include "egllog.h" + +#include "native_x11.h" +#include "x11_screen.h" + +#define X11_PROBE_MAGIC 0x11980BE /* "X11PROBE" */ + +static struct drm_api *api; + +static void +x11_probe_destroy(struct native_probe *nprobe) +{ + if (nprobe->data) + free(nprobe->data); + free(nprobe); +} + +struct native_probe * +native_create_probe(EGLNativeDisplayType dpy) +{ + struct native_probe *nprobe; + struct x11_screen *xscr; + int scr; + const char *driver_name = NULL; + Display *xdpy; + + nprobe = CALLOC_STRUCT(native_probe); + if (!nprobe) + return NULL; + + xdpy = dpy; + if (!xdpy) { + xdpy = XOpenDisplay(NULL); + if (!xdpy) { + free(nprobe); + return NULL; + } + } + + scr = DefaultScreen(xdpy); + xscr = x11_screen_create(xdpy, scr); + if (xscr) { + if (x11_screen_support(xscr, X11_SCREEN_EXTENSION_DRI2)) { + driver_name = x11_screen_probe_dri2(xscr); + if (driver_name) + nprobe->data = strdup(driver_name); + } + + x11_screen_destroy(xscr); + } + + if (xdpy != dpy) + XCloseDisplay(xdpy); + + nprobe->magic = X11_PROBE_MAGIC; + nprobe->display = dpy; + + nprobe->destroy = x11_probe_destroy; + + return nprobe; +} + +enum native_probe_result +native_get_probe_result(struct native_probe *nprobe) +{ + if (!nprobe || nprobe->magic != X11_PROBE_MAGIC) + return NATIVE_PROBE_UNKNOWN; + + if (!api) + api = drm_api_create(); + + /* this is a software driver */ + if (!api) + return NATIVE_PROBE_SUPPORTED; + + /* the display does not support DRI2 or the driver mismatches */ + if (!nprobe->data || strcmp(api->name, (const char *) nprobe->data) != 0) + return NATIVE_PROBE_FALLBACK; + + return NATIVE_PROBE_EXACT; +} + +const char * +native_get_name(void) +{ + static char x11_name[32]; + + if (!api) + api = drm_api_create(); + + if (api) + snprintf(x11_name, sizeof(x11_name), "X11/%s", api->name); + else + snprintf(x11_name, sizeof(x11_name), "X11"); + + return x11_name; +} + +struct native_display * +native_create_display(EGLNativeDisplayType dpy) +{ + struct native_display *ndpy = NULL; + boolean force_sw; + + if (!api) + api = drm_api_create(); + + force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); + if (api && !force_sw) { + ndpy = x11_create_dri2_display(dpy, api); + } + + if (!ndpy) { + EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; + + _eglLog(level, "use software fallback"); + ndpy = x11_create_ximage_display(dpy, TRUE); + } + + return ndpy; +} diff --git a/src/gallium/state_trackers/egl/x11/native_x11.h b/src/gallium/state_trackers/egl/x11/native_x11.h new file mode 100644 index 00000000000..622ddac5df6 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/native_x11.h @@ -0,0 +1,37 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NATIVE_X11_H_ +#define _NATIVE_X11_H_ + +#include "state_tracker/drm_api.h" +#include "common/native.h" + +struct native_display * +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm); + +struct native_display * +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api); + +#endif /* _NATIVE_X11_H_ */ diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c new file mode 100644 index 00000000000..92a62f230e0 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -0,0 +1,684 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <sys/ipc.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <X11/extensions/XShm.h> +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "pipe/p_compiler.h" +#include "util/u_simple_screen.h" +#include "util/u_inlines.h" +#include "softpipe/sp_winsys.h" +#include "egllog.h" + +#include "sw_winsys.h" +#include "native_x11.h" +#include "x11_screen.h" + +enum ximage_surface_type { + XIMAGE_SURFACE_TYPE_WINDOW, + XIMAGE_SURFACE_TYPE_PIXMAP, + XIMAGE_SURFACE_TYPE_PBUFFER +}; + +struct ximage_display { + struct native_display base; + Display *dpy; + boolean own_dpy; + + struct x11_screen *xscr; + int xscr_number; + + boolean use_xshm; + + struct pipe_winsys *winsys; + struct ximage_config *configs; + int num_configs; +}; + +struct ximage_buffer { + XImage *ximage; + + struct pipe_texture *texture; + XShmSegmentInfo *shm_info; + boolean xshm_attached; +}; + +struct ximage_surface { + struct native_surface base; + Drawable drawable; + enum ximage_surface_type type; + enum pipe_format color_format; + XVisualInfo visual; + struct ximage_display *xdpy; + + int width, height; + GC gc; + + struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; +}; + +struct ximage_config { + struct native_config base; + const XVisualInfo *visual; +}; + +static INLINE struct ximage_display * +ximage_display(const struct native_display *ndpy) +{ + return (struct ximage_display *) ndpy; +} + +static INLINE struct ximage_surface * +ximage_surface(const struct native_surface *nsurf) +{ + return (struct ximage_surface *) nsurf; +} + +static INLINE struct ximage_config * +ximage_config(const struct native_config *nconf) +{ + return (struct ximage_config *) nconf; +} + +static void +ximage_surface_free_buffer(struct native_surface *nsurf, + enum native_attachment which) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xbuf = &xsurf->buffers[which]; + + pipe_texture_reference(&xbuf->texture, NULL); + + if (xbuf->shm_info) { + if (xbuf->xshm_attached) + XShmDetach(xsurf->xdpy->dpy, xbuf->shm_info); + if (xbuf->shm_info->shmaddr != (void *) -1) + shmdt(xbuf->shm_info->shmaddr); + if (xbuf->shm_info->shmid != -1) + shmctl(xbuf->shm_info->shmid, IPC_RMID, 0); + + xbuf->shm_info->shmaddr = (void *) -1; + xbuf->shm_info->shmid = -1; + } +} + +static boolean +ximage_surface_alloc_buffer(struct native_surface *nsurf, + enum native_attachment which) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xbuf = &xsurf->buffers[which]; + struct pipe_screen *screen = xsurf->xdpy->base.screen; + struct pipe_texture templ; + + /* free old data */ + if (xbuf->texture) + ximage_surface_free_buffer(&xsurf->base, which); + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.format = xsurf->color_format; + templ.width0 = xsurf->width; + templ.height0 = xsurf->height; + templ.depth0 = 1; + templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; + + if (xbuf->shm_info) { + struct pipe_buffer *pbuf; + unsigned stride, size; + void *addr = NULL; + + stride = util_format_get_stride(xsurf->color_format, xsurf->width); + /* alignment should depend on visual? */ + stride = align(stride, 4); + size = stride * xsurf->height; + + /* create and attach shm object */ + xbuf->shm_info->shmid = shmget(IPC_PRIVATE, size, 0755); + if (xbuf->shm_info->shmid != -1) { + xbuf->shm_info->shmaddr = + shmat(xbuf->shm_info->shmid, NULL, 0); + if (xbuf->shm_info->shmaddr != (void *) -1) { + if (XShmAttach(xsurf->xdpy->dpy, xbuf->shm_info)) { + addr = xbuf->shm_info->shmaddr; + xbuf->xshm_attached = TRUE; + } + } + } + + if (addr) { + pbuf = screen->user_buffer_create(screen, addr, size); + if (pbuf) { + xbuf->texture = + screen->texture_blanket(screen, &templ, &stride, pbuf); + pipe_buffer_reference(&pbuf, NULL); + } + } + } + else { + xbuf->texture = screen->texture_create(screen, &templ); + } + + /* clean up the buffer if allocation failed */ + if (!xbuf->texture) + ximage_surface_free_buffer(&xsurf->base, which); + + return (xbuf->texture != NULL); +} + +static boolean +ximage_surface_draw_buffer(struct native_surface *nsurf, + enum native_attachment which) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xbuf = &xsurf->buffers[which]; + struct pipe_screen *screen = xsurf->xdpy->base.screen; + struct pipe_transfer *transfer; + + if (xsurf->type == XIMAGE_SURFACE_TYPE_PBUFFER) + return TRUE; + + assert(xsurf->drawable && xbuf->ximage && xbuf->texture); + + transfer = screen->get_tex_transfer(screen, xbuf->texture, + 0, 0, 0, PIPE_TRANSFER_READ, 0, 0, xsurf->width, xsurf->height); + if (!transfer) + return FALSE; + + xbuf->ximage->bytes_per_line = transfer->stride; + xbuf->ximage->data = screen->transfer_map(screen, transfer); + if (!xbuf->ximage->data) { + screen->tex_transfer_destroy(transfer); + return FALSE; + } + + + if (xbuf->shm_info) + XShmPutImage(xsurf->xdpy->dpy, xsurf->drawable, xsurf->gc, + xbuf->ximage, 0, 0, 0, 0, xsurf->width, xsurf->height, False); + else + XPutImage(xsurf->xdpy->dpy, xsurf->drawable, xsurf->gc, + xbuf->ximage, 0, 0, 0, 0, xsurf->width, xsurf->height); + + xbuf->ximage->data = NULL; + screen->transfer_unmap(screen, transfer); + + /* + * softpipe allows the pipe transfer to be re-used, but we don't want to + * rely on that behavior. + */ + screen->tex_transfer_destroy(transfer); + + XSync(xsurf->xdpy->dpy, FALSE); + + return TRUE; +} + +static boolean +ximage_surface_flush_frontbuffer(struct native_surface *nsurf) +{ + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); +} + +static boolean +ximage_surface_swap_buffers(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xfront, *xback, xtmp; + + xfront = &xsurf->buffers[NATIVE_ATTACHMENT_FRONT_LEFT]; + xback = &xsurf->buffers[NATIVE_ATTACHMENT_BACK_LEFT]; + + /* draw the back buffer directly if there is no front buffer */ + if (!xfront->texture) + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_BACK_LEFT); + + /* swap the buffers */ + xtmp = *xfront; + *xfront = *xback; + *xback = xtmp; + + /* the front/back textures are swapped */ + xsurf->sequence_number++; + + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); +} + +static void +ximage_surface_update_geometry(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + Status ok; + Window root; + int x, y; + unsigned int w, h, border, depth; + + /* pbuffer has fixed geometry */ + if (xsurf->type == XIMAGE_SURFACE_TYPE_PBUFFER) + return; + + ok = XGetGeometry(xsurf->xdpy->dpy, xsurf->drawable, + &root, &x, &y, &w, &h, &border, &depth); + if (ok) { + xsurf->width = w; + xsurf->height = h; + } +} + +static boolean +ximage_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + boolean new_buffers = FALSE; + int att; + + ximage_surface_update_geometry(&xsurf->base); + + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct ximage_buffer *xbuf = &xsurf->buffers[att]; + + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) + continue; + + /* reallocate the texture */ + if (!xbuf->texture || + xsurf->width != xbuf->texture->width0 || + xsurf->height != xbuf->texture->height0) { + new_buffers = TRUE; + if (ximage_surface_alloc_buffer(&xsurf->base, att)) { + /* update ximage */ + if (xbuf->ximage) { + xbuf->ximage->width = xsurf->width; + xbuf->ximage->height = xsurf->height; + } + } + } + + if (textures) { + textures[att] = NULL; + pipe_texture_reference(&textures[att], xbuf->texture); + } + } + + /* increase the sequence number so that caller knows */ + if (new_buffers) + xsurf->sequence_number++; + + if (seq_num) + *seq_num = xsurf->sequence_number; + if (width) + *width = xsurf->width; + if (height) + *height = xsurf->height; + + return TRUE; +} + +static void +ximage_surface_wait(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + XSync(xsurf->xdpy->dpy, FALSE); + /* TODO XGetImage and update the front texture */ +} + +static void +ximage_surface_destroy(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + int i; + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct ximage_buffer *xbuf = &xsurf->buffers[i]; + ximage_surface_free_buffer(&xsurf->base, i); + /* xbuf->shm_info is owned by xbuf->ximage? */ + if (xbuf->ximage) { + XDestroyImage(xbuf->ximage); + xbuf->ximage = NULL; + } + } + + if (xsurf->type != XIMAGE_SURFACE_TYPE_PBUFFER) + XFreeGC(xsurf->xdpy->dpy, xsurf->gc); + free(xsurf); +} + +static struct ximage_surface * +ximage_display_create_surface(struct native_display *ndpy, + enum ximage_surface_type type, + Drawable drawable, + const struct native_config *nconf) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + struct ximage_config *xconf = ximage_config(nconf); + struct ximage_surface *xsurf; + int i; + + xsurf = CALLOC_STRUCT(ximage_surface); + if (!xsurf) + return NULL; + + xsurf->xdpy = xdpy; + xsurf->type = type; + xsurf->color_format = xconf->base.color_format; + xsurf->drawable = drawable; + + if (xsurf->type != XIMAGE_SURFACE_TYPE_PBUFFER) { + xsurf->drawable = drawable; + xsurf->visual = *xconf->visual; + + xsurf->gc = XCreateGC(xdpy->dpy, xsurf->drawable, 0, NULL); + if (!xsurf->gc) { + free(xsurf); + return NULL; + } + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct ximage_buffer *xbuf = &xsurf->buffers[i]; + + if (xdpy->use_xshm) { + xbuf->shm_info = calloc(1, sizeof(*xbuf->shm_info)); + if (xbuf->shm_info) { + /* initialize shm info */ + xbuf->shm_info->shmid = -1; + xbuf->shm_info->shmaddr = (void *) -1; + xbuf->shm_info->readOnly = TRUE; + + xbuf->ximage = XShmCreateImage(xsurf->xdpy->dpy, + xsurf->visual.visual, + xsurf->visual.depth, + ZPixmap, NULL, + xbuf->shm_info, + 0, 0); + } + } + else { + xbuf->ximage = XCreateImage(xsurf->xdpy->dpy, + xsurf->visual.visual, + xsurf->visual.depth, + ZPixmap, 0, /* format, offset */ + NULL, /* data */ + 0, 0, /* size */ + 8, /* bitmap_pad */ + 0); /* bytes_per_line */ + } + + if (!xbuf->ximage) { + XFreeGC(xdpy->dpy, xsurf->gc); + free(xsurf); + return NULL; + } + } + } + + xsurf->base.destroy = ximage_surface_destroy; + xsurf->base.swap_buffers = ximage_surface_swap_buffers; + xsurf->base.flush_frontbuffer = ximage_surface_flush_frontbuffer; + xsurf->base.validate = ximage_surface_validate; + xsurf->base.wait = ximage_surface_wait; + + return xsurf; +} + +static struct native_surface * +ximage_display_create_window_surface(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf) +{ + struct ximage_surface *xsurf; + + xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_WINDOW, + (Drawable) win, nconf); + return (xsurf) ? &xsurf->base : NULL; +} + +static struct native_surface * +ximage_display_create_pixmap_surface(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct ximage_surface *xsurf; + + xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_PIXMAP, + (Drawable) pix, nconf); + return (xsurf) ? &xsurf->base : NULL; +} + +static struct native_surface * +ximage_display_create_pbuffer_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct ximage_surface *xsurf; + + xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_PBUFFER, + (Drawable) None, nconf); + if (xsurf) { + xsurf->width = width; + xsurf->height = height; + } + return (xsurf) ? &xsurf->base : NULL; +} + +static enum pipe_format +choose_format(const XVisualInfo *vinfo) +{ + enum pipe_format fmt; + /* TODO elaborate the formats */ + switch (vinfo->depth) { + case 32: + fmt = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case 24: + fmt = PIPE_FORMAT_X8R8G8B8_UNORM; + break; + case 16: + fmt = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + fmt = PIPE_FORMAT_NONE; + break; + } + + return fmt; +} + +static const struct native_config ** +ximage_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + const struct native_config **configs; + int i; + + /* first time */ + if (!xdpy->configs) { + const XVisualInfo *visuals; + int num_visuals, count, j; + + visuals = x11_screen_get_visuals(xdpy->xscr, &num_visuals); + if (!visuals) + return NULL; + + /* + * Create two configs for each visual. + * One with depth/stencil buffer; one without + */ + xdpy->configs = calloc(num_visuals * 2, sizeof(*xdpy->configs)); + if (!xdpy->configs) + return NULL; + + count = 0; + for (i = 0; i < num_visuals; i++) { + for (j = 0; j < 2; j++) { + struct ximage_config *xconf = &xdpy->configs[count]; + __GLcontextModes *mode = &xconf->base.mode; + + xconf->visual = &visuals[i]; + xconf->base.color_format = choose_format(xconf->visual); + if (xconf->base.color_format == PIPE_FORMAT_NONE) + continue; + + x11_screen_convert_visual(xdpy->xscr, xconf->visual, mode); + /* support double buffer mode */ + mode->doubleBufferMode = TRUE; + + xconf->base.depth_format = PIPE_FORMAT_NONE; + xconf->base.stencil_format = PIPE_FORMAT_NONE; + /* create the second config with depth/stencil buffer */ + if (j == 1) { + xconf->base.depth_format = PIPE_FORMAT_S8Z24_UNORM; + xconf->base.stencil_format = PIPE_FORMAT_S8Z24_UNORM; + mode->depthBits = 24; + mode->stencilBits = 8; + mode->haveDepthBuffer = TRUE; + mode->haveStencilBuffer = TRUE; + } + + mode->maxPbufferWidth = 4096; + mode->maxPbufferHeight = 4096; + mode->maxPbufferPixels = 4096 * 4096; + mode->drawableType = + GLX_WINDOW_BIT | GLX_PIXMAP_BIT | GLX_PBUFFER_BIT; + mode->swapMethod = GLX_SWAP_EXCHANGE_OML; + + if (mode->alphaBits) + mode->bindToTextureRgba = TRUE; + else + mode->bindToTextureRgb = TRUE; + + count++; + } + } + + xdpy->num_configs = count; + } + + configs = malloc(xdpy->num_configs * sizeof(*configs)); + if (configs) { + for (i = 0; i < xdpy->num_configs; i++) + configs[i] = (const struct native_config *) &xdpy->configs[i]; + if (num_configs) + *num_configs = xdpy->num_configs; + } + return configs; +} + +static boolean +ximage_display_is_pixmap_supported(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + enum pipe_format fmt; + uint depth; + + depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix); + switch (depth) { + case 32: + fmt = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case 24: + fmt = PIPE_FORMAT_X8R8G8B8_UNORM; + break; + case 16: + fmt = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + fmt = PIPE_FORMAT_NONE; + break; + } + + return (fmt == nconf->color_format); +} + +static void +ximage_display_destroy(struct native_display *ndpy) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + + if (xdpy->configs) + free(xdpy->configs); + + xdpy->base.screen->destroy(xdpy->base.screen); + free(xdpy->winsys); + + x11_screen_destroy(xdpy->xscr); + if (xdpy->own_dpy) + XCloseDisplay(xdpy->dpy); + free(xdpy); +} + +struct native_display * +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm) +{ + struct ximage_display *xdpy; + + xdpy = CALLOC_STRUCT(ximage_display); + if (!xdpy) + return NULL; + + xdpy->dpy = dpy; + if (!xdpy->dpy) { + xdpy->dpy = XOpenDisplay(NULL); + if (!xdpy->dpy) { + free(xdpy); + return NULL; + } + xdpy->own_dpy = TRUE; + } + + xdpy->xscr_number = DefaultScreen(xdpy->dpy); + xdpy->xscr = x11_screen_create(xdpy->dpy, xdpy->xscr_number); + if (!xdpy->xscr) { + free(xdpy); + return NULL; + } + + xdpy->use_xshm = + (use_xshm && x11_screen_support(xdpy->xscr, X11_SCREEN_EXTENSION_XSHM)); + + xdpy->winsys = create_sw_winsys(); + xdpy->base.screen = softpipe_create_screen(xdpy->winsys); + + xdpy->base.destroy = ximage_display_destroy; + + xdpy->base.get_configs = ximage_display_get_configs; + xdpy->base.is_pixmap_supported = ximage_display_is_pixmap_supported; + xdpy->base.create_window_surface = ximage_display_create_window_surface; + xdpy->base.create_pixmap_surface = ximage_display_create_pixmap_surface; + xdpy->base.create_pbuffer_surface = ximage_display_create_pbuffer_surface; + + return &xdpy->base; +} diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/state_trackers/egl/x11/sw_winsys.c index 6ee3ede38cb..33328aadf26 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/state_trackers/egl/x11/sw_winsys.c @@ -35,9 +35,9 @@ */ -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.h b/src/gallium/state_trackers/egl/x11/sw_winsys.h index f96c5a14b0a..f96c5a14b0a 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.h +++ b/src/gallium/state_trackers/egl/x11/sw_winsys.h diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c new file mode 100644 index 00000000000..d72bfc99d3e --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -0,0 +1,453 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <X11/Xlibint.h> +#include <X11/extensions/XShm.h> +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "xf86drm.h" +#include "egllog.h" + +#include "x11_screen.h" +#include "dri2.h" +#include "glxinit.h" + +struct x11_screen { + Display *dpy; + int number; + + /* + * This is used to fetch GLX visuals/fbconfigs. It steals code from GLX. + * It might be better to rewrite the part in Xlib or XCB. + */ + __GLXdisplayPrivate *glx_dpy; + + int dri_major, dri_minor; + char *dri_driver; + char *dri_device; + int dri_fd; + + XVisualInfo *visuals; + int num_visuals; + + /* cached values for x11_drawable_get_depth */ + Drawable last_drawable; + unsigned int last_depth; +}; + + +/** + * Create a X11 screen. + */ +struct x11_screen * +x11_screen_create(Display *dpy, int screen) +{ + struct x11_screen *xscr; + + if (screen >= ScreenCount(dpy)) + return NULL; + + xscr = CALLOC_STRUCT(x11_screen); + if (xscr) { + xscr->dpy = dpy; + xscr->number = screen; + + xscr->dri_major = -1; + xscr->dri_fd = -1; + } + return xscr; +} + +/** + * Destroy a X11 screen. + */ +void +x11_screen_destroy(struct x11_screen *xscr) +{ + if (xscr->dri_fd >= 0) + close(xscr->dri_fd); + if (xscr->dri_driver) + Xfree(xscr->dri_driver); + if (xscr->dri_device) + Xfree(xscr->dri_device); + + /* xscr->glx_dpy will be destroyed with the X display */ + + if (xscr->visuals) + XFree(xscr->visuals); + free(xscr); +} + +static boolean +x11_screen_init_dri2(struct x11_screen *xscr) +{ + if (xscr->dri_major < 0) { + int eventBase, errorBase; + + if (!DRI2QueryExtension(xscr->dpy, &eventBase, &errorBase) || + !DRI2QueryVersion(xscr->dpy, &xscr->dri_major, &xscr->dri_minor)) + xscr->dri_major = -1; + } + return (xscr->dri_major >= 0); +} + +static boolean +x11_screen_init_glx(struct x11_screen *xscr) +{ + if (!xscr->glx_dpy) + xscr->glx_dpy = __glXInitialize(xscr->dpy); + return (xscr->glx_dpy != NULL); +} + +/** + * Return true if the screen supports the extension. + */ +boolean +x11_screen_support(struct x11_screen *xscr, enum x11_screen_extension ext) +{ + boolean supported = FALSE; + + switch (ext) { + case X11_SCREEN_EXTENSION_XSHM: + supported = XShmQueryExtension(xscr->dpy); + break; + case X11_SCREEN_EXTENSION_GLX: + supported = x11_screen_init_glx(xscr); + break; + case X11_SCREEN_EXTENSION_DRI2: + supported = x11_screen_init_dri2(xscr); + break; + default: + break; + } + + return supported; +} + +/** + * Return the X visuals. + */ +const XVisualInfo * +x11_screen_get_visuals(struct x11_screen *xscr, int *num_visuals) +{ + if (!xscr->visuals) { + XVisualInfo vinfo_template; + vinfo_template.screen = xscr->number; + xscr->visuals = XGetVisualInfo(xscr->dpy, VisualScreenMask, + &vinfo_template, &xscr->num_visuals); + } + + if (num_visuals) + *num_visuals = xscr->num_visuals; + return xscr->visuals; +} + +void +x11_screen_convert_visual(struct x11_screen *xscr, const XVisualInfo *visual, + __GLcontextModes *mode) +{ + int r, g, b, a; + int visual_type; + + r = util_bitcount(visual->red_mask); + g = util_bitcount(visual->green_mask); + b = util_bitcount(visual->blue_mask); + a = visual->depth - (r + g + b); +#if defined(__cplusplus) || defined(c_plusplus) + visual_type = visual->c_class; +#else + visual_type = visual->class; +#endif + + /* convert to GLX visual type */ + switch (visual_type) { + case TrueColor: + visual_type = GLX_TRUE_COLOR; + break; + case DirectColor: + visual_type = GLX_DIRECT_COLOR; + break; + case PseudoColor: + visual_type = GLX_PSEUDO_COLOR; + break; + case StaticColor: + visual_type = GLX_STATIC_COLOR; + break; + case GrayScale: + visual_type = GLX_GRAY_SCALE; + break; + case StaticGray: + visual_type = GLX_STATIC_GRAY; + break; + default: + visual_type = GLX_NONE; + break; + } + + mode->rgbBits = r + g + b + a; + mode->redBits = r; + mode->greenBits = g; + mode->blueBits = b; + mode->alphaBits = a; + mode->visualID = visual->visualid; + mode->visualType = visual_type; + + /* sane defaults */ + mode->renderType = GLX_RGBA_BIT; + mode->rgbMode = TRUE; + mode->visualRating = GLX_SLOW_CONFIG; + mode->xRenderable = TRUE; +} + +/** + * Return the GLX fbconfigs. + */ +const __GLcontextModes * +x11_screen_get_glx_configs(struct x11_screen *xscr) +{ + return (x11_screen_init_glx(xscr)) + ? xscr->glx_dpy->screenConfigs[xscr->number].configs + : NULL; +} + +/** + * Return the GLX visuals. + */ +const __GLcontextModes * +x11_screen_get_glx_visuals(struct x11_screen *xscr) +{ + return (x11_screen_init_glx(xscr)) + ? xscr->glx_dpy->screenConfigs[xscr->number].visuals + : NULL; +} + +static boolean +x11_screen_is_driver_equal(struct x11_screen *xscr, const char *driver) +{ + return (strcmp(xscr->dri_driver, driver) == 0); +} + +/** + * Probe the screen for the DRI2 driver name. + */ +const char * +x11_screen_probe_dri2(struct x11_screen *xscr) +{ + /* get the driver name and the device name */ + if (!xscr->dri_driver) { + if (!DRI2Connect(xscr->dpy, RootWindow(xscr->dpy, xscr->number), + &xscr->dri_driver, &xscr->dri_device)) + xscr->dri_driver = xscr->dri_device = NULL; + } + + return xscr->dri_driver; +} + +/** + * Enable DRI2 and returns the file descriptor of the DRM device. The file + * descriptor will be closed automatically when the screen is destoryed. + */ +int +x11_screen_enable_dri2(struct x11_screen *xscr, const char *driver) +{ + if (xscr->dri_fd < 0) { + int fd; + drm_magic_t magic; + + /* get the driver name and the device name first */ + if (!x11_screen_probe_dri2(xscr)) + return -1; + + if (!x11_screen_is_driver_equal(xscr, driver)) { + _eglLog(_EGL_WARNING, "Driver mismatch: %s != %s", + xscr->dri_driver, driver); + return -1; + } + + fd = open(xscr->dri_device, O_RDWR); + if (fd < 0) { + _eglLog(_EGL_WARNING, "failed to open %s", xscr->dri_device); + return -1; + } + + memset(&magic, 0, sizeof(magic)); + if (drmGetMagic(fd, &magic)) { + _eglLog(_EGL_WARNING, "failed to get magic"); + close(fd); + return -1; + } + + if (!DRI2Authenticate(xscr->dpy, + RootWindow(xscr->dpy, xscr->number), magic)) { + _eglLog(_EGL_WARNING, "failed to authenticate magic"); + close(fd); + return -1; + } + + xscr->dri_fd = fd; + } + + return xscr->dri_fd; +} + +/** + * Create/Destroy the DRI drawable. + */ +void +x11_drawable_enable_dri2(struct x11_screen *xscr, + Drawable drawable, boolean on) +{ + if (on) + DRI2CreateDrawable(xscr->dpy, drawable); + else + DRI2DestroyDrawable(xscr->dpy, drawable); +} + +/** + * Copy between buffers of the DRI2 drawable. + */ +void +x11_drawable_copy_buffers(struct x11_screen *xscr, Drawable drawable, + int x, int y, int width, int height, + int src_buf, int dst_buf) +{ + XRectangle rect; + XserverRegion region; + + rect.x = x; + rect.y = y; + rect.width = width; + rect.height = height; + + region = XFixesCreateRegion(xscr->dpy, &rect, 1); + DRI2CopyRegion(xscr->dpy, drawable, region, dst_buf, src_buf); + XFixesDestroyRegion(xscr->dpy, region); +} + +/** + * Get the buffers of the DRI2 drawable. The returned array should be freed. + */ +struct x11_drawable_buffer * +x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, + int *width, int *height, unsigned int *attachments, + boolean with_format, int num_ins, int *num_outs) +{ + DRI2Buffer *dri2bufs; + + if (with_format) + dri2bufs = DRI2GetBuffersWithFormat(xscr->dpy, drawable, width, height, + attachments, num_ins, num_outs); + else + dri2bufs = DRI2GetBuffers(xscr->dpy, drawable, width, height, + attachments, num_ins, num_outs); + + return (struct x11_drawable_buffer *) dri2bufs; +} + +/** + * Return the depth of a drawable. + * + * Unlike other drawable functions, the drawable needs not be a DRI2 drawable. + */ +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable) +{ + unsigned int depth; + + if (drawable != xscr->last_drawable) { + Window root; + int x, y; + unsigned int w, h, border; + Status ok; + + ok = XGetGeometry(xscr->dpy, drawable, &root, + &x, &y, &w, &h, &border, &depth); + if (!ok) + depth = 0; + + xscr->last_drawable = drawable; + xscr->last_depth = depth; + } + else { + depth = xscr->last_depth; + } + + return depth; +} + +/** + * Create a mode list of the given size. + */ +__GLcontextModes * +x11_context_modes_create(unsigned count) +{ + const size_t size = sizeof(__GLcontextModes); + __GLcontextModes *base = NULL; + __GLcontextModes **next; + unsigned i; + + next = &base; + for (i = 0; i < count; i++) { + *next = (__GLcontextModes *) calloc(1, size); + if (*next == NULL) { + x11_context_modes_destroy(base); + base = NULL; + break; + } + next = &((*next)->next); + } + + return base; +} + +/** + * Destroy a mode list. + */ +void +x11_context_modes_destroy(__GLcontextModes *modes) +{ + while (modes != NULL) { + __GLcontextModes *next = modes->next; + free(modes); + modes = next; + } +} + +/** + * Return the number of the modes in the mode list. + */ +unsigned +x11_context_modes_count(const __GLcontextModes *modes) +{ + const __GLcontextModes *mode; + int count = 0; + for (mode = modes; mode; mode = mode->next) + count++; + return count; +} diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.h b/src/gallium/state_trackers/egl/x11/x11_screen.h new file mode 100644 index 00000000000..5432858ac3e --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/x11_screen.h @@ -0,0 +1,105 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _X11_SCREEN_H_ +#define _X11_SCREEN_H_ + +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <X11/extensions/dri2tokens.h> +#include "pipe/p_compiler.h" +#include "common/native.h" + +enum x11_screen_extension { + X11_SCREEN_EXTENSION_XSHM, + X11_SCREEN_EXTENSION_GLX, + X11_SCREEN_EXTENSION_DRI2, +}; + +/* the same as DRI2Buffer */ +struct x11_drawable_buffer { + unsigned int attachment; + unsigned int name; + unsigned int pitch; + unsigned int cpp; + unsigned int flags; +}; + +struct x11_screen; + +struct x11_screen * +x11_screen_create(Display *dpy, int screen); + +void +x11_screen_destroy(struct x11_screen *xscr); + +boolean +x11_screen_support(struct x11_screen *xscr, enum x11_screen_extension ext); + +const XVisualInfo * +x11_screen_get_visuals(struct x11_screen *xscr, int *num_visuals); + +void +x11_screen_convert_visual(struct x11_screen *xscr, const XVisualInfo *visual, + __GLcontextModes *mode); + +const __GLcontextModes * +x11_screen_get_glx_configs(struct x11_screen *xscr); + +const __GLcontextModes * +x11_screen_get_glx_visuals(struct x11_screen *xscr); + +const char * +x11_screen_probe_dri2(struct x11_screen *xscr); + +int +x11_screen_enable_dri2(struct x11_screen *xscr, const char *driver); + +__GLcontextModes * +x11_context_modes_create(unsigned count); + +void +x11_context_modes_destroy(__GLcontextModes *modes); + +unsigned +x11_context_modes_count(const __GLcontextModes *modes); + +void +x11_drawable_enable_dri2(struct x11_screen *xscr, + Drawable drawable, boolean on); + +void +x11_drawable_copy_buffers(struct x11_screen *xscr, Drawable drawable, + int x, int y, int width, int height, + int src_buf, int dst_buf); + +struct x11_drawable_buffer * +x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, + int *width, int *height, unsigned int *attachments, + boolean with_format, int num_ins, int *num_outs); + +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable); + +#endif /* _X11_SCREEN_H_ */ diff --git a/src/gallium/state_trackers/es/Makefile b/src/gallium/state_trackers/es/Makefile new file mode 100644 index 00000000000..b0365512719 --- /dev/null +++ b/src/gallium/state_trackers/es/Makefile @@ -0,0 +1,84 @@ +# src/gallium/state_trackers/es/Makefile + +# Build the ES 1/2 state tracker libraries +# This consists of core Mesa ES, plus GL/gallium state tracker. + +TOP = ../../../.. +include $(TOP)/configs/current + +GLES_1_VERSION_MAJOR = 1 +GLES_1_VERSION_MINOR = 1 +GLES_1_VERSION_PATCH = 0 + +GLES_2_VERSION_MAJOR = 2 +GLES_2_VERSION_MINOR = 0 +GLES_2_VERSION_PATCH = 0 + + +# Maybe move these into configs/default: +GLES_1_LIB = GLESv1_CM +GLES_1_LIB_NAME = lib$(GLES_1_LIB).so +GLES_2_LIB = GLESv2 +GLES_2_LIB_NAME = lib$(GLES_2_LIB).so + + +ES1_OBJECTS = st_es1.o +ES2_OBJECTS = st_es2.o + + +ES1_LIBS = \ + $(TOP)/src/mesa/es/libes1gallium.a \ + $(TOP)/src/mesa/es/libes1api.a + +ES2_LIBS = \ + $(TOP)/src/mesa/es/libes2gallium.a \ + $(TOP)/src/mesa/es/libes2api.a + +SYS_LIBS = -lm -pthread + + +INCLUDE_DIRS = \ + -I$(TOP)/src/gallium/include + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@ + + +# Default: make both GL ES 1.1 and GL ES 2.0 libraries +default: $(TOP)/$(LIB_DIR)/$(GLES_1_LIB_NAME) $(TOP)/$(LIB_DIR)/$(GLES_2_LIB_NAME) + +# Make the shared libs +$(TOP)/$(LIB_DIR)/$(GLES_1_LIB_NAME): $(ES1_OBJECTS) $(ES1_LIBS) $(GALLIUM_AUXILIARIES) + $(MKLIB) -o $(GLES_1_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + -major $(GLES_1_VERSION_MAJOR) \ + -minor $(GLES_1_VERSION_MINOR) \ + -patch $(GLES_1_VERSION_PATCH) \ + -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ + $(ES1_OBJECTS) \ + -Wl,--whole-archive $(ES1_LIBS) -Wl,--no-whole-archive \ + $(GALLIUM_AUXILIARIES) $(SYS_LIBS) + +$(TOP)/$(LIB_DIR)/$(GLES_2_LIB_NAME): $(ES2_OBJECTS) $(ES1_LIBS) $(GALLIUM_AUXILIARIES) + $(MKLIB) -o $(GLES_2_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + -major $(GLES_2_VERSION_MAJOR) \ + -minor $(GLES_2_VERSION_MINOR) \ + -patch $(GLES_2_VERSION_PATCH) \ + -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ + $(ES2_OBJECTS) \ + -Wl,--whole-archive $(ES2_LIBS) -Wl,--no-whole-archive \ + $(GALLIUM_AUXILIARIES) $(SYS_LIBS) + +install: default + $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/GLES + $(INSTALL) -m 644 $(TOP)/include/GLES/*.h $(DESTDIR)$(INSTALL_DIR)/include/GLES + $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/GLES2 + $(INSTALL) -m 644 $(TOP)/include/GLES2/*.h $(DESTDIR)$(INSTALL_DIR)/include/GLES2 + $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) + $(MINSTALL) $(TOP)/$(LIB_DIR)/libGLESv1* $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) + $(MINSTALL) $(TOP)/$(LIB_DIR)/libGLESv2* $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) + +clean: + -rm -f *.o *~ + -rm -f $(TOP)/$(LIB_DIR)/$(GLES_1_LIB_NAME)* $(TOP)/$(LIB_DIR)/$(GLES_2_LIB_NAME)* + +depend: diff --git a/src/gallium/state_trackers/es/st_es1.c b/src/gallium/state_trackers/es/st_es1.c new file mode 100644 index 00000000000..25bc53b21eb --- /dev/null +++ b/src/gallium/state_trackers/es/st_es1.c @@ -0,0 +1,3 @@ +#include "pipe/p_compiler.h" + +PUBLIC const int st_api_OpenGL_ES1 = 1; diff --git a/src/gallium/state_trackers/es/st_es2.c b/src/gallium/state_trackers/es/st_es2.c new file mode 100644 index 00000000000..171ea62b97f --- /dev/null +++ b/src/gallium/state_trackers/es/st_es2.c @@ -0,0 +1,3 @@ +#include "pipe/p_compiler.h" + +PUBLIC const int st_api_OpenGL_ES2 = 1; diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c index 84d47b12edc..bd4a85caa04 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c +++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c @@ -193,7 +193,7 @@ _glxapi_get_proc_address(const char *funcName) } -__GLXextFuncPtr +PUBLIC __GLXextFuncPtr glXGetProcAddressARB(const GLubyte *procName) { __GLXextFuncPtr f; diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index 1783bc504d9..fb314f3b528 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -760,7 +760,6 @@ PUBLIC XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) { static GLboolean firstTime = GL_TRUE; - struct pipe_context *_pipe = NULL; struct pipe_context *pipe = NULL; XMesaContext c; GLcontext *mesaCtx; @@ -788,11 +787,12 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (screen == NULL) goto fail; - _pipe = driver.create_pipe_context(_screen, (void *) c); - if (_pipe == NULL) + /* Trace screen knows how to properly wrap context creation in the + * wrapped screen, so nothing special to do here: + */ + pipe = screen->context_create(screen, (void *) c); + if (pipe == NULL) goto fail; - pipe = trace_context_create(screen, _pipe); - pipe->priv = c; c->st = st_create_context(pipe, &v->mesa_visual, diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h index d24971ca1c7..63a329cbe05 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.h +++ b/src/gallium/state_trackers/glx/xlib/xm_api.h @@ -60,7 +60,7 @@ and create a window, you must do the following to use the X/Mesa interface: #include "main/mtypes.h" #include "state_tracker/st_context.h" #include "state_tracker/st_public.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" # include <X11/Xlib.h> diff --git a/src/gallium/state_trackers/glx/xlib/xm_winsys.h b/src/gallium/state_trackers/glx/xlib/xm_winsys.h index 0e57605c34b..4bd5b5c8d3b 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_winsys.h +++ b/src/gallium/state_trackers/glx/xlib/xm_winsys.h @@ -39,13 +39,6 @@ struct xm_driver { struct pipe_screen *(*create_pipe_screen)( void ); - /* The context_private argument needs to go away. Is currently used - * in a round-about way to associate a display-target surface with its - * Xlib window. - */ - struct pipe_context *(*create_pipe_context)( struct pipe_screen *, - void *context_private ); - void (*display_surface)( struct xmesa_buffer *, struct pipe_surface * ); diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript index 8498a90812e..527e065cd91 100644 --- a/src/gallium/state_trackers/python/SConscript +++ b/src/gallium/state_trackers/python/SConscript @@ -21,6 +21,7 @@ if 'python' in env['statetrackers']: 'gdi32', 'user32', 'kernel32', + 'ws2_32', ]) else: env.Append(LIBS = [ diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 96b13c2258e..99e177b0be4 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -40,9 +40,9 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "cso_cache/cso_context.h" +#include "util/u_inlines.h" #include "util/u_draw_quad.h" #include "util/u_tile.h" #include "util/u_math.h" @@ -76,7 +76,6 @@ %rename(BlendColor) pipe_blend_color; %rename(Blend) pipe_blend_state; %rename(Clip) pipe_clip_state; -%rename(ConstantBuffer) pipe_constant_buffer; %rename(Depth) pipe_depth_state; %rename(Stencil) pipe_stencil_state; %rename(Alpha) pipe_alpha_state; diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 84ce1a41e6d..ce893dad453 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -142,10 +142,7 @@ struct st_context { void set_constant_buffer(unsigned shader, unsigned index, struct pipe_buffer *buffer ) { - struct pipe_constant_buffer state; - memset(&state, 0, sizeof(state)); - state.buffer = buffer; - $self->pipe->set_constant_buffer($self->pipe, shader, index, &state); + $self->pipe->set_constant_buffer($self->pipe, shader, index, buffer); } void set_framebuffer(const struct pipe_framebuffer_state *state ) diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index bb61979d07c..190db43b082 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -94,7 +94,7 @@ struct_factories = { "pipe_blend_color": gallium.BlendColor, "pipe_blend_state": gallium.Blend, #"pipe_clip_state": gallium.Clip, - #"pipe_constant_buffer": gallium.ConstantBuffer, + #"pipe_buffer": gallium.Buffer, "pipe_depth_state": gallium.Depth, "pipe_stencil_state": gallium.Stencil, "pipe_alpha_state": gallium.Alpha, @@ -462,10 +462,10 @@ class Context(Object): sys.stdout.flush() def set_constant_buffer(self, shader, index, buffer): - if buffer is not None and buffer.buffer is not None: - self.real.set_constant_buffer(shader, index, buffer.buffer) + if buffer is not None: + self.real.set_constant_buffer(shader, index, buffer) - self.dump_constant_buffer(buffer.buffer) + self.dump_constant_buffer(buffer) def set_framebuffer_state(self, state): _state = gallium.Framebuffer() @@ -534,6 +534,8 @@ class Context(Object): gallium.PIPE_FORMAT_R32G32B32_FLOAT: '3f', gallium.PIPE_FORMAT_R32G32B32A32_FLOAT: '4f', gallium.PIPE_FORMAT_B8G8R8A8_UNORM: '4B', + gallium.PIPE_FORMAT_R8G8B8A8_UNORM: '4B', + gallium.PIPE_FORMAT_R16G16B16_SNORM: '3h', }[velem.src_format] data = vbuf.buffer.read() diff --git a/src/gallium/state_trackers/python/samples/gs.py b/src/gallium/state_trackers/python/samples/gs.py index a07cf557f2f..cd68abac9a1 100644 --- a/src/gallium/state_trackers/python/samples/gs.py +++ b/src/gallium/state_trackers/python/samples/gs.py @@ -72,11 +72,11 @@ def test(dev): # disabled blending/masking blend = Blend() - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.colormask = PIPE_MASK_RGBA + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].colormask = PIPE_MASK_RGBA ctx.set_blend(blend) # depth/stencil/alpha diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py index e5e168bdc8d..f0b5e3dc984 100644 --- a/src/gallium/state_trackers/python/samples/tri.py +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -72,11 +72,11 @@ def test(dev): # disabled blending/masking blend = Blend() - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.colormask = PIPE_MASK_RGBA + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].colormask = PIPE_MASK_RGBA ctx.set_blend(blend) # depth/stencil/alpha diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index d144af2447d..1146a8b0c38 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -29,7 +29,7 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "cso_cache/cso_context.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -80,8 +80,7 @@ st_device_create_from_st_winsys(const struct st_winsys *st_ws) { struct st_device *st_dev; - if(!st_ws->screen_create || - !st_ws->context_create) + if(!st_ws->screen_create) return NULL; st_dev = CALLOC_STRUCT(st_device); @@ -158,13 +157,7 @@ st_context_create(struct st_device *st_dev) st_device_reference(&st_ctx->st_dev, st_dev); - st_ctx->real_pipe = st_dev->st_ws->context_create(st_dev->real_screen); - if(!st_ctx->real_pipe) { - st_context_destroy(st_ctx); - return NULL; - } - - st_ctx->pipe = trace_context_create(st_dev->screen, st_ctx->real_pipe); + st_ctx->pipe = st_dev->screen->create_context(st_dev->screen, NULL); if(!st_ctx->pipe) { st_context_destroy(st_ctx); return NULL; @@ -180,11 +173,11 @@ st_context_create(struct st_device *st_dev) { struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].colormask = PIPE_MASK_RGBA; cso_set_blend(st_ctx->cso, &blend); } diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h index f786e134118..de9e0215d8e 100644 --- a/src/gallium/state_trackers/python/st_device.h +++ b/src/gallium/state_trackers/python/st_device.h @@ -50,7 +50,6 @@ struct st_surface struct st_context { struct st_device *st_dev; - struct pipe_context *real_pipe; struct pipe_context *pipe; struct cso_context *cso; diff --git a/src/gallium/state_trackers/python/st_hardpipe_winsys.c b/src/gallium/state_trackers/python/st_hardpipe_winsys.c index 43aaaabf2a1..a3110a19d5d 100644 --- a/src/gallium/state_trackers/python/st_hardpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_hardpipe_winsys.c @@ -217,21 +217,6 @@ st_hardpipe_screen_create(void) } -static struct pipe_context * -st_hardpipe_context_create(struct pipe_screen *screen) -{ - if(st_hardpipe_load()) { - if(screen == pfnGetGalliumScreenMESA()) - return pfnCreateGalliumContextMESA(); - else - return NULL; - } - else - return st_softpipe_winsys.context_create(screen); -} - - const struct st_winsys st_hardpipe_winsys = { - &st_hardpipe_screen_create, - &st_hardpipe_context_create + &st_hardpipe_screen_create }; diff --git a/src/gallium/state_trackers/python/st_llvmpipe_winsys.c b/src/gallium/state_trackers/python/st_llvmpipe_winsys.c index 0096b18c994..5d83b5a9e15 100644 --- a/src/gallium/state_trackers/python/st_llvmpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_llvmpipe_winsys.c @@ -36,7 +36,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "llvmpipe/lp_winsys.h" @@ -135,14 +135,7 @@ no_winsys: } -static struct pipe_context * -st_llvmpipe_context_create(struct pipe_screen *screen) -{ - return llvmpipe_create(screen); -} - const struct st_winsys st_softpipe_winsys = { - &st_llvmpipe_screen_create, - &st_llvmpipe_context_create, + &st_llvmpipe_screen_create }; diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index 96377414211..32a6551a87a 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -29,7 +29,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_tile.h" #include "util/u_math.h" diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index a3294e877a6..81676bc3a4f 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -35,225 +35,9 @@ * @author Jose Fonseca */ - -#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */ -#include "pipe/p_format.h" -#include "pipe/p_context.h" -#include "pipe/p_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "st_winsys.h" - -struct st_softpipe_buffer -{ - struct pipe_buffer base; - boolean userBuffer; /** Is this a user-space buffer? */ - void *data; - void *mapped; -}; - - -/** Cast wrapper */ -static INLINE struct st_softpipe_buffer * -st_softpipe_buffer( struct pipe_buffer *buf ) -{ - return (struct st_softpipe_buffer *)buf; -} - - -static void * -st_softpipe_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = st_softpipe_buf->data; - return st_softpipe_buf->mapped; -} - - -static void -st_softpipe_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = NULL; -} - - -static void -st_softpipe_buffer_destroy(struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); - - if (oldBuf->data) { - if (!oldBuf->userBuffer) - align_free(oldBuf->data); - - oldBuf->data = NULL; - } - - FREE(oldBuf); -} - - -static void -st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, - struct pipe_surface *surf, - void *context_private) -{ -} - - - -static const char * -st_softpipe_get_name(struct pipe_winsys *winsys) -{ - return "softpipe"; -} - - -static struct pipe_buffer * -st_softpipe_buffer_create(struct pipe_winsys *winsys, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.alignment = alignment; - buffer->base.usage = usage; - buffer->base.size = size; - - buffer->data = align_malloc(size, alignment); - - return &buffer->base; -} - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_buffer * -st_softpipe_user_buffer_create(struct pipe_winsys *winsys, - void *ptr, - unsigned bytes) -{ - struct st_softpipe_buffer *buffer; - - buffer = CALLOC_STRUCT(st_softpipe_buffer); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.size = bytes; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -static struct pipe_buffer * -st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - const unsigned alignment = 64; - unsigned nblocksy; - - nblocksy = util_format_get_nblocksy(format, height); - *stride = align(util_format_get_stride(format, width), alignment); - - return winsys->buffer_create(winsys, alignment, - usage, - *stride * nblocksy); -} - - -static void -st_softpipe_fence_reference(struct pipe_winsys *winsys, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ -} - - -static int -st_softpipe_fence_signalled(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static int -st_softpipe_fence_finish(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static void -st_softpipe_destroy(struct pipe_winsys *winsys) -{ - FREE(winsys); -} - - -static struct pipe_screen * -st_softpipe_screen_create(void) -{ - static struct pipe_winsys *winsys; - struct pipe_screen *screen; - - winsys = CALLOC_STRUCT(pipe_winsys); - if(!winsys) - return NULL; - - winsys->destroy = st_softpipe_destroy; - - winsys->buffer_create = st_softpipe_buffer_create; - winsys->user_buffer_create = st_softpipe_user_buffer_create; - winsys->buffer_map = st_softpipe_buffer_map; - winsys->buffer_unmap = st_softpipe_buffer_unmap; - winsys->buffer_destroy = st_softpipe_buffer_destroy; - - winsys->surface_buffer_create = st_softpipe_surface_buffer_create; - - winsys->fence_reference = st_softpipe_fence_reference; - winsys->fence_signalled = st_softpipe_fence_signalled; - winsys->fence_finish = st_softpipe_fence_finish; - - winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; - winsys->get_name = st_softpipe_get_name; - - screen = softpipe_create_screen(winsys); - if(!screen) - st_softpipe_destroy(winsys); - - return screen; -} - - -static struct pipe_context * -st_softpipe_context_create(struct pipe_screen *screen) -{ - return softpipe_create(screen); -} - - const struct st_winsys st_softpipe_winsys = { - &st_softpipe_screen_create, - &st_softpipe_context_create, + &softpipe_create_screen_malloc }; diff --git a/src/gallium/state_trackers/python/st_winsys.h b/src/gallium/state_trackers/python/st_winsys.h index b8cb612d863..0c7b6a200e1 100644 --- a/src/gallium/state_trackers/python/st_winsys.h +++ b/src/gallium/state_trackers/python/st_winsys.h @@ -38,9 +38,6 @@ struct st_winsys { struct pipe_screen * (*screen_create)(void); - - struct pipe_context * - (*context_create)(struct pipe_screen *screen); }; diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore new file mode 100644 index 00000000000..e33609d251c --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore @@ -0,0 +1 @@ +*.png diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-cb-1d.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-cb-1d.sh new file mode 100644 index 00000000000..85fb9ea4e7f --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-cb-1d.sh @@ -0,0 +1,13 @@ +FRAG + +DCL IN[0], COLOR, LINEAR +DCL OUT[0], COLOR +DCL CONST[1] +DCL CONST[3] +DCL TEMP[0..1] + +ADD TEMP[0], IN[0], CONST[1] +RCP TEMP[1], CONST[3].xxxx +MUL OUT[0], TEMP[0], TEMP[1] + +END diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-cb-2d.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-cb-2d.sh new file mode 100644 index 00000000000..f70a5146f4e --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-cb-2d.sh @@ -0,0 +1,9 @@ +FRAG + +DCL IN[0], COLOR, LINEAR +DCL OUT[0], COLOR +DCL CONST[1][1..2] + +MAD OUT[0], IN[0], CONST[1][2], CONST[1][1] + +END diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-slt.sh b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-slt.sh index f2a1521cbf0..d58b7886a12 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-slt.sh +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/frag-slt.sh @@ -1,4 +1,4 @@ -FRAG1.1 +FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py index 8d3bf9d4d7e..41dd69d2542 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py @@ -26,6 +26,7 @@ # ########################################################################## +import struct from gallium import * @@ -50,11 +51,11 @@ def test(dev, name): # disabled blending/masking blend = Blend() - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.colormask = PIPE_MASK_RGBA + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].colormask = PIPE_MASK_RGBA ctx.set_blend(blend) # depth/stencil/alpha @@ -146,6 +147,42 @@ def test(dev, name): fs = Shader(file('frag-' + name + '.sh', 'rt').read()) ctx.set_fragment_shader(fs) + constbuf0 = dev.buffer_create(64, + (PIPE_BUFFER_USAGE_CONSTANT | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE), + 4 * 4 * 4) + + cbdata = '' + cbdata += struct.pack('4f', 0.4, 0.0, 0.0, 1.0) + cbdata += struct.pack('4f', 1.0, 1.0, 1.0, 1.0) + cbdata += struct.pack('4f', 2.0, 2.0, 2.0, 2.0) + cbdata += struct.pack('4f', 4.0, 8.0, 16.0, 32.0) + + constbuf0.write(cbdata, 0) + + ctx.set_constant_buffer(PIPE_SHADER_FRAGMENT, + 0, + constbuf0) + + constbuf1 = dev.buffer_create(64, + (PIPE_BUFFER_USAGE_CONSTANT | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE), + 4 * 4 * 4) + + cbdata = '' + cbdata += struct.pack('4f', 0.1, 0.1, 0.1, 0.1) + cbdata += struct.pack('4f', 0.25, 0.25, 0.25, 0.25) + cbdata += struct.pack('4f', 0.5, 0.5, 0.5, 0.5) + cbdata += struct.pack('4f', 0.75, 0.75, 0.75, 0.75) + + constbuf1.write(cbdata, 0) + + ctx.set_constant_buffer(PIPE_SHADER_FRAGMENT, + 1, + constbuf1) + xy = [ -0.8, -0.8, 0.8, -0.8, @@ -184,6 +221,8 @@ def main(): tests = [ 'abs', 'add', + 'cb-1d', + 'cb-2d', 'dp3', 'dp4', 'dst', diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore new file mode 100644 index 00000000000..e33609d251c --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore @@ -0,0 +1 @@ +*.png diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-cb-1d.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-cb-1d.sh new file mode 100644 index 00000000000..b41fe5dd38f --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-cb-1d.sh @@ -0,0 +1,16 @@ +VERT + +DCL IN[0], POSITION +DCL IN[1], COLOR +DCL OUT[0], POSITION +DCL OUT[1], COLOR +DCL CONST[1] +DCL CONST[3] +DCL TEMP[0..1] + +MOV OUT[0], IN[0] +ADD TEMP[0], IN[1], CONST[1] +RCP TEMP[1], CONST[3].xxxx +MUL OUT[1], TEMP[0], TEMP[1] + +END diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-cb-2d.sh b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-cb-2d.sh new file mode 100644 index 00000000000..45f5e6b7299 --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vert-cb-2d.sh @@ -0,0 +1,12 @@ +VERT + +DCL IN[0], POSITION +DCL IN[1], COLOR +DCL OUT[0], POSITION +DCL OUT[1], COLOR +DCL CONST[1][1..2] + +MOV OUT[0], IN[0] +MAD OUT[1], IN[1], CONST[1][2], CONST[1][1] + +END diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py index 01bf5a3210d..2c44f872e1d 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py @@ -27,6 +27,8 @@ ########################################################################## +import struct + from gallium import * def make_image(surface): @@ -50,11 +52,11 @@ def test(dev, name): # disabled blending/masking blend = Blend() - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.colormask = PIPE_MASK_RGBA + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].colormask = PIPE_MASK_RGBA ctx.set_blend(blend) # depth/stencil/alpha @@ -143,6 +145,42 @@ def test(dev, name): ''') ctx.set_fragment_shader(fs) + constbuf0 = dev.buffer_create(64, + (PIPE_BUFFER_USAGE_CONSTANT | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE), + 4 * 4 * 4) + + cbdata = '' + cbdata += struct.pack('4f', 0.4, 0.0, 0.0, 1.0) + cbdata += struct.pack('4f', 1.0, 1.0, 1.0, 1.0) + cbdata += struct.pack('4f', 2.0, 2.0, 2.0, 2.0) + cbdata += struct.pack('4f', 4.0, 8.0, 16.0, 32.0) + + constbuf0.write(cbdata, 0) + + ctx.set_constant_buffer(PIPE_SHADER_VERTEX, + 0, + constbuf0) + + constbuf1 = dev.buffer_create(64, + (PIPE_BUFFER_USAGE_CONSTANT | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE), + 4 * 4 * 4) + + cbdata = '' + cbdata += struct.pack('4f', 0.1, 0.1, 0.1, 0.1) + cbdata += struct.pack('4f', 0.25, 0.25, 0.25, 0.25) + cbdata += struct.pack('4f', 0.5, 0.5, 0.5, 0.5) + cbdata += struct.pack('4f', 0.75, 0.75, 0.75, 0.75) + + constbuf1.write(cbdata, 0) + + ctx.set_constant_buffer(PIPE_SHADER_VERTEX, + 1, + constbuf1) + xy = [ 0.0, 0.8, -0.2, 0.4, @@ -213,6 +251,8 @@ def main(): 'add', 'arl', 'arr', + 'cb-1d', + 'cb-2d', 'dp3', 'dp4', 'dst', diff --git a/src/gallium/state_trackers/python/tests/texture_render.py b/src/gallium/state_trackers/python/tests/texture_render.py index 79287f2cace..0fac1ea5ef3 100755 --- a/src/gallium/state_trackers/python/tests/texture_render.py +++ b/src/gallium/state_trackers/python/tests/texture_render.py @@ -115,11 +115,11 @@ class TextureTest(TestCase): # disabled blending/masking blend = Blend() - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.colormask = PIPE_MASK_RGBA + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].colormask = PIPE_MASK_RGBA ctx.set_blend(blend) # no-op depth/stencil/alpha diff --git a/src/gallium/state_trackers/python/tests/texture_sample.py b/src/gallium/state_trackers/python/tests/texture_sample.py index 520961c8051..db32b537a17 100755 --- a/src/gallium/state_trackers/python/tests/texture_sample.py +++ b/src/gallium/state_trackers/python/tests/texture_sample.py @@ -140,11 +140,11 @@ class TextureColorSampleTest(TestCase): # disabled blending/masking blend = Blend() - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.colormask = PIPE_MASK_RGBA + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].colormask = PIPE_MASK_RGBA ctx.set_blend(blend) # no-op depth/stencil/alpha @@ -327,11 +327,11 @@ class TextureDepthSampleTest(TestCase): # disabled blending/masking blend = Blend() - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO - blend.colormask = PIPE_MASK_RGBA + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.rt[0].colormask = PIPE_MASK_RGBA ctx.set_blend(blend) # depth/stencil/alpha diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile index fc97bf51f8f..037d8dc911a 100644 --- a/src/gallium/state_trackers/vega/Makefile +++ b/src/gallium/state_trackers/vega/Makefile @@ -1,8 +1,14 @@ -# src/mesa/Makefile +# src/gallium/state_trackers/vega/Makefile TOP = ../../../.. include $(TOP)/configs/current -GALLIUM = $(TOP) + +VG_LIB = OpenVG +VG_LIB_NAME = lib$(VG_LIB).so + +VG_MAJOR = 1 +VG_MINOR = 0 +VG_TINY = 0 ### Lists of source files, included by Makefiles @@ -34,88 +40,54 @@ VG_SOURCES = \ shader.c \ shaders_cache.c +VG_OBJECTS = $(VG_SOURCES:.c=.o) -### All the core C sources - -ALL_SOURCES = \ - $(VG_SOURCES) - - -### Object files -VG_OBJECTS = \ - $(VG_SOURCES:.c=.o) +VG_LIBS = $(GALLIUM_AUXILIARIES) -lm ### Include directories INCLUDE_DIRS = \ -I$(TOP)/include \ - -I$(GALLIUM)/include \ - -I$(GALLIUM)/src/gallium/include \ - -I$(GALLIUM)/src/gallium/auxiliary + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary -VG_LIB = OpenVG -VG_LIB_NAME = lib$(VG_LIB).so - -VG_MAJOR = 1 -VG_MINOR = 0 -VG_TINY = 0 - -GALLIUM_LIBS = \ - $(GALLIUM)/src/gallium/auxiliary/libgallium.a - -.SUFFIXES : .cpp .c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@ -.cpp.o: - $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ - - -default: depend subdirs $(TOP)/$(LIB_DIR)/$(VG_LIB_NAME) +default: depend $(TOP)/$(LIB_DIR)/$(VG_LIB_NAME) # Make the OpenVG library -$(TOP)/$(LIB_DIR)/$(VG_LIB_NAME): $(VG_OBJECTS) $(GALLIUM_LIBS) - $(TOP)/bin/mklib -o $(VG_LIB) \ +$(TOP)/$(LIB_DIR)/$(VG_LIB_NAME): $(VG_OBJECTS) $(VG_LIBS) + $(MKLIB) -o $(VG_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major $(VG_MAJOR) \ -minor $(VG_MINOR) \ -patch $(VG_TINY) \ -install $(TOP)/$(LIB_DIR) \ - $(VG_OBJECTS) $(GALLIUM_LIBS) \ - -Wl,--whole-archive $(LIBS) -Wl,--no-whole-archive $(SYS_LIBS) + $(VG_OBJECTS) $(VG_LIBS) ###################################################################### # Generic stuff -depend: $(ALL_SOURCES) +depend: $(VG_SOURCES) @ echo "running $(MKDEP)" @ rm -f depend # workaround oops on gutsy?!? @ touch depend - @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(VG_SOURCES) \ > /dev/null 2>/dev/null - -subdirs: - install: default - $(INSTALL) -d $(INSTALL_DIR)/include/VG - $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) - $(INSTALL) -m 644 $(TOP)/include/VG/*.h $(INSTALL_DIR)/include/VG - @if [ -e $(TOP)/$(LIB_DIR)/$(VG_LIB_NAME) ]; then \ - $(INSTALL) $(TOP)/$(LIB_DIR)/libOpenVG* $(INSTALL_DIR)/$(LIB_DIR); \ - fi + $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/VG + $(INSTALL) -m 644 $(TOP)/include/VG/*.h $(DESTDIR)$(INSTALL_DIR)/include/VG + $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) + $(MINSTALL) $(TOP)/$(LIB_DIR)/libOpenVG* $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) # Emacs tags tags: etags `find . -name \*.[ch]` $(TOP)/include/VG/*.h clean: - -rm -f *.o - -rm -f */*.o - -rm -f */*/*.o - -rm -f depend depend.bak + rm -f $(VG_OBJECTS) + rm -f depend depend.bak -include depend +sinclude depend diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index 2f984fb7b9a..20c72c1ff54 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" @@ -127,19 +127,19 @@ static void setup_blend() struct vg_context *ctx = vg_current_context(); struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; if (ctx->state.vg.filter_channel_mask & VG_RED) - blend.colormask |= PIPE_MASK_R; + blend.rt[0].colormask |= PIPE_MASK_R; if (ctx->state.vg.filter_channel_mask & VG_GREEN) - blend.colormask |= PIPE_MASK_G; + blend.rt[0].colormask |= PIPE_MASK_G; if (ctx->state.vg.filter_channel_mask & VG_BLUE) - blend.colormask |= PIPE_MASK_B; + blend.rt[0].colormask |= PIPE_MASK_B; if (ctx->state.vg.filter_channel_mask & VG_ALPHA) - blend.colormask |= PIPE_MASK_A; - blend.blend_enable = 1; + blend.rt[0].colormask |= PIPE_MASK_A; + blend.rt[0].blend_enable = 0; cso_set_blend(ctx->cso_context, &blend); } @@ -147,22 +147,22 @@ static void setup_constant_buffer(struct vg_context *ctx, const void *buffer, VGint param_bytes) { struct pipe_context *pipe = ctx->pipe; - struct pipe_constant_buffer *cbuf = &ctx->filter.buffer; + struct pipe_buffer **cbuf = &ctx->filter.buffer; /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(pipe->screen, 16, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); + *cbuf = pipe_buffer_create(pipe->screen, 16, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, buffer); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); } static void setup_samplers(struct vg_context *ctx, struct filter_info *info) diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c index c437553bc23..015241498ed 100644 --- a/src/gallium/state_trackers/vega/api_images.c +++ b/src/gallium/state_trackers/vega/api_images.c @@ -35,7 +35,7 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_blit.h" #include "util/u_tile.h" #include "util/u_memory.h" diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c index 4f9f3dae173..9c123a4cf95 100644 --- a/src/gallium/state_trackers/vega/api_masks.c +++ b/src/gallium/state_trackers/vega/api_masks.c @@ -31,8 +31,7 @@ #include "vg_context.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" /* for winsys->update_buffer */ +#include "util/u_inlines.h" #include "util/u_pack_color.h" #include "util/u_draw_quad.h" @@ -116,8 +115,8 @@ clear_with_quad(struct vg_context *st, float x0, float y0, x1, y1); */ - if (st->pipe->winsys && st->pipe->winsys->update_buffer) - st->pipe->winsys->update_buffer( st->pipe->winsys, + if (st->pipe->screen && st->pipe->screen->update_buffer) + st->pipe->screen->update_buffer( st->pipe->screen, st->pipe->priv ); cso_save_blend(st->cso_context); @@ -129,14 +128,11 @@ clear_with_quad(struct vg_context *st, float x0, float y0, { struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask |= PIPE_MASK_R; - blend.colormask |= PIPE_MASK_G; - blend.colormask |= PIPE_MASK_B; - blend.colormask |= PIPE_MASK_A; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].colormask = PIPE_MASK_RGBA; cso_set_blend(st->cso_context, &blend); } diff --git a/src/gallium/state_trackers/vega/api_path.c b/src/gallium/state_trackers/vega/api_path.c index 15ac1900f4b..58ebb3b60ea 100644 --- a/src/gallium/state_trackers/vega/api_path.c +++ b/src/gallium/state_trackers/vega/api_path.c @@ -32,7 +32,7 @@ #include "paint.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_draw_quad.h" VGPath vgCreatePath(VGint pathFormat, diff --git a/src/gallium/state_trackers/vega/asm_fill.h b/src/gallium/state_trackers/vega/asm_fill.h index 2f394ad6c55..27773467fa8 100644 --- a/src/gallium/state_trackers/vega/asm_fill.h +++ b/src/gallium/state_trackers/vega/asm_fill.h @@ -27,166 +27,375 @@ #ifndef ASM_FILL_H #define ASM_FILL_H -static const char solid_fill_asm[] = - "MOV %s, CONST[0]\n"; - - -static const char linear_grad_asm[] = - "MOV TEMP[0].xy, IN[0]\n" - "MOV TEMP[0].z, CONST[1].yyyy\n" - "DP3 TEMP[1], CONST[2], TEMP[0]\n" - "DP3 TEMP[2], CONST[3], TEMP[0]\n" - "DP3 TEMP[3], CONST[4], TEMP[0]\n" - "RCP TEMP[3], TEMP[3]\n" - "MUL TEMP[1], TEMP[1], TEMP[3]\n" - "MUL TEMP[2], TEMP[2], TEMP[3]\n" - "MOV TEMP[4].x, TEMP[1]\n" - "MOV TEMP[4].y, TEMP[2]\n" - "MUL TEMP[0], CONST[0].yyyy, TEMP[4].yyyy\n" - "MAD TEMP[1], CONST[0].xxxx, TEMP[4].xxxx, TEMP[0]\n" - "MUL TEMP[2], TEMP[1], CONST[0].zzzz\n" - "TEX %s, TEMP[2], SAMP[0], 1D\n"; - -static const char radial_grad_asm[] = - "MOV TEMP[0].xy, IN[0]\n" - "MOV TEMP[0].z, CONST[1].yyyy\n" - "DP3 TEMP[1], CONST[2], TEMP[0]\n" - "DP3 TEMP[2], CONST[3], TEMP[0]\n" - "DP3 TEMP[3], CONST[4], TEMP[0]\n" - "RCP TEMP[3], TEMP[3]\n" - "MUL TEMP[1], TEMP[1], TEMP[3]\n" - "MUL TEMP[2], TEMP[2], TEMP[3]\n" - "MOV TEMP[5].x, TEMP[1]\n" - "MOV TEMP[5].y, TEMP[2]\n" - "MUL TEMP[0], CONST[0].yyyy, TEMP[5].yyyy\n" - "MAD TEMP[1], CONST[0].xxxx, TEMP[5].xxxx, TEMP[0]\n" - "ADD TEMP[1], TEMP[1], TEMP[1]\n" - "MUL TEMP[3], TEMP[5].yyyy, TEMP[5].yyyy\n" - "MAD TEMP[4], TEMP[5].xxxx, TEMP[5].xxxx, TEMP[3]\n" - "MOV TEMP[4], -TEMP[4]\n" - "MUL TEMP[2], CONST[0].zzzz, TEMP[4]\n" - "MUL TEMP[0], CONST[1].wwww, TEMP[2]\n" - "MUL TEMP[3], TEMP[1], TEMP[1]\n" - "SUB TEMP[2], TEMP[3], TEMP[0]\n" - "RSQ TEMP[2], |TEMP[2]|\n" - "RCP TEMP[2], TEMP[2]\n" - "SUB TEMP[1], TEMP[2], TEMP[1]\n" - "ADD TEMP[0], CONST[0].zzzz, CONST[0].zzzz\n" - "RCP TEMP[0], TEMP[0]\n" - "MUL TEMP[2], TEMP[1], TEMP[0]\n" - "TEX %s, TEMP[2], SAMP[0], 1D\n"; - -static const char pattern_asm[] = - "MOV TEMP[0].xy, IN[0]\n" - "MOV TEMP[0].z, CONST[1].yyyy\n" - "DP3 TEMP[1], CONST[2], TEMP[0]\n" - "DP3 TEMP[2], CONST[3], TEMP[0]\n" - "DP3 TEMP[3], CONST[4], TEMP[0]\n" - "RCP TEMP[3], TEMP[3]\n" - "MUL TEMP[1], TEMP[1], TEMP[3]\n" - "MUL TEMP[2], TEMP[2], TEMP[3]\n" - "MOV TEMP[4].x, TEMP[1]\n" - "MOV TEMP[4].y, TEMP[2]\n" - "RCP TEMP[0], CONST[1].zwzw\n" - "MOV TEMP[1], TEMP[4]\n" - "MUL TEMP[1].x, TEMP[1], TEMP[0]\n" - "MUL TEMP[1].y, TEMP[1], TEMP[0]\n" - "TEX %s, TEMP[1], SAMP[0], 2D\n"; - - -static const char mask_asm[] = - "TEX TEMP[1], IN[0], SAMP[1], 2D\n" - "MUL TEMP[0].w, TEMP[0].wwww, TEMP[1].wwww\n" - "MOV %s, TEMP[0]\n"; - - -static const char image_normal_asm[] = - "TEX %s, IN[1], SAMP[3], 2D\n"; - -static const char image_multiply_asm[] = - "TEX TEMP[1], IN[1], SAMP[3], 2D\n" - "MUL %s, TEMP[0], TEMP[1]\n"; - -static const char image_stencil_asm[] = - "TEX TEMP[1], IN[1], SAMP[3], 2D\n" - "MUL %s, TEMP[0], TEMP[1]\n"; - - -#define EXTENDED_BLEND_OVER \ - "SUB TEMP[3], CONST[1].yyyy, TEMP[1].wwww\n" \ - "SUB TEMP[4], CONST[1].yyyy, TEMP[0].wwww\n" \ - "MUL TEMP[3], TEMP[0], TEMP[3]\n" \ - "MUL TEMP[4], TEMP[1], TEMP[4]\n" \ - "ADD TEMP[3], TEMP[3], TEMP[4]\n" - -static const char blend_multiply_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - EXTENDED_BLEND_OVER - "MUL TEMP[4], TEMP[0], TEMP[1]\n" - "ADD TEMP[1], TEMP[4], TEMP[3]\n"/*result.rgb*/ - "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n" - "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n" - "SUB TEMP[1].w, TEMP[3], TEMP[2]\n" - "MOV %s, TEMP[1]\n"; -#if 1 -static const char blend_screen_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - "ADD TEMP[3], TEMP[0], TEMP[1]\n" - "MUL TEMP[2], TEMP[0], TEMP[1]\n" - "SUB %s, TEMP[3], TEMP[2]\n"; -#else -static const char blend_screen_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - "MOV %s, TEMP[1]\n"; -#endif - -static const char blend_darken_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - EXTENDED_BLEND_OVER - "MUL TEMP[4], TEMP[0], TEMP[1].wwww\n" - "MUL TEMP[5], TEMP[1], TEMP[0].wwww\n" - "MIN TEMP[4], TEMP[4], TEMP[5]\n" - "ADD TEMP[1], TEMP[3], TEMP[4]\n" - "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n" - "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n" - "SUB TEMP[1].w, TEMP[3], TEMP[2]\n" - "MOV %s, TEMP[1]\n"; - -static const char blend_lighten_asm[] = - "TEX TEMP[1], IN[0], SAMP[2], 2D\n" - EXTENDED_BLEND_OVER - "MUL TEMP[4], TEMP[0], TEMP[1].wwww\n" - "MUL TEMP[5], TEMP[1], TEMP[0].wwww\n" - "MAX TEMP[4], TEMP[4], TEMP[5]\n" - "ADD TEMP[1], TEMP[3], TEMP[4]\n" - "MUL TEMP[2], TEMP[0].wwww, TEMP[1].wwww\n" - "ADD TEMP[3], TEMP[0].wwww, TEMP[1].wwww\n" - "SUB TEMP[1].w, TEMP[3], TEMP[2]\n" - "MOV %s, TEMP[1]\n"; - - -static const char premultiply_asm[] = - "MUL TEMP[0].xyz, TEMP[0], TEMP[0].wwww\n"; - -static const char unpremultiply_asm[] = - "TEX TEMP[0], IN[0], SAMP[1], 2D\n"; - - -static const char color_bw_asm[] = - "ADD TEMP[1], CONST[1].yyyy, CONST[1].yyyy\n" - "RCP TEMP[2], TEMP[1]\n" - "ADD TEMP[1], CONST[1].yyyy, TEMP[2]\n" - "ADD TEMP[2].x, TEMP[0].xxxx, TEMP[0].yyyy\n" - "ADD TEMP[2].x, TEMP[0].zzzz, TEMP[0].xxxx\n" - "SGE TEMP[0].xyz, TEMP[2].xxxx, TEMP[1]\n" - "SGE TEMP[0].w, TEMP[0].wwww, TEMP[2].yyyy\n" - "MOV %s, TEMP[0]\n"; +#include "tgsi/tgsi_ureg.h" + +typedef void (* ureg_func)( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant); + +static INLINE void +solid_fill( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_MOV(ureg, *out, constant[0]); +} + +static INLINE void +linear_grad( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XY), + in[0]); + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_Z), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); + ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); + ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); + ureg_RCP(ureg, temp[3], ureg_src(temp[3])); + ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); + ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + ureg_MUL(ureg, temp[0], + ureg_scalar(constant[0], TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp[1], + ureg_scalar(constant[0], TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X), + ureg_src(temp[0])); + ureg_MUL(ureg, temp[2], ureg_src(temp[1]), + ureg_scalar(constant[0], TGSI_SWIZZLE_Z)); + ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); +} + +static INLINE void +radial_grad( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + + ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]); + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_Z), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); + ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); + ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); + ureg_RCP(ureg, temp[3], ureg_src(temp[3])); + ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); + ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); + ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_X), ureg_src(temp[1])); + ureg_MOV(ureg, ureg_writemask(temp[5], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + ureg_MUL(ureg, temp[0], ureg_scalar(constant[0], TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp[1], + ureg_scalar(constant[0], TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), ureg_src(temp[0])); + ureg_ADD(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[1])); + ureg_MUL(ureg, temp[3], + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp[4], + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[5]), TGSI_SWIZZLE_X), + ureg_src(temp[3])); + ureg_MOV(ureg, temp[4], ureg_negate(ureg_src(temp[4]))); + ureg_MUL(ureg, temp[2], + ureg_scalar(constant[0], TGSI_SWIZZLE_Z), + ureg_src(temp[4])); + ureg_MUL(ureg, temp[0], + ureg_scalar(constant[1], TGSI_SWIZZLE_W), + ureg_src(temp[2])); + ureg_MUL(ureg, temp[3], ureg_src(temp[1]), ureg_src(temp[1])); + + ureg_SUB(ureg, temp[2], ureg_src(temp[3]), ureg_src(temp[0])); + ureg_RSQ(ureg, temp[2], ureg_abs(ureg_src(temp[2]))); + ureg_RCP(ureg, temp[2], ureg_src(temp[2])); + ureg_SUB(ureg, temp[1], ureg_src(temp[2]), ureg_src(temp[1])); + ureg_ADD(ureg, temp[0], + ureg_scalar(constant[0], TGSI_SWIZZLE_Z), + ureg_scalar(constant[0], TGSI_SWIZZLE_Z)); + ureg_RCP(ureg, temp[0], ureg_src(temp[0])); + ureg_MUL(ureg, temp[2], ureg_src(temp[1]), ureg_src(temp[0])); + ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]); + +} + + +static INLINE void +pattern( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XY), + in[0]); + ureg_MOV(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_Z), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_DP3(ureg, temp[1], constant[2], ureg_src(temp[0])); + ureg_DP3(ureg, temp[2], constant[3], ureg_src(temp[0])); + ureg_DP3(ureg, temp[3], constant[4], ureg_src(temp[0])); + ureg_RCP(ureg, temp[3], ureg_src(temp[3])); + ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3])); + ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_X), ureg_src(temp[1])); + ureg_MOV(ureg, ureg_writemask(temp[4], TGSI_WRITEMASK_Y), ureg_src(temp[2])); + ureg_RCP(ureg, temp[0], + ureg_swizzle(constant[1], + TGSI_SWIZZLE_Z, + TGSI_SWIZZLE_W, + TGSI_SWIZZLE_Z, + TGSI_SWIZZLE_W)); + ureg_MOV(ureg, temp[1], ureg_src(temp[4])); + ureg_MUL(ureg, + ureg_writemask(temp[1], TGSI_WRITEMASK_X), + ureg_src(temp[1]), + ureg_src(temp[0])); + ureg_MUL(ureg, + ureg_writemask(temp[1], TGSI_WRITEMASK_Y), + ureg_src(temp[1]), + ureg_src(temp[0])); + ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]); +} + +static INLINE void +mask( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]); + ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_MOV(ureg, *out, ureg_src(temp[0])); +} + +static INLINE void +image_normal( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]); +} + + +static INLINE void +image_multiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); + ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); +} + + +static INLINE void +image_stencil( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]); + ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1])); +} + +#define EXTENDED_BLENDER_OVER_FUNC \ + ureg_SUB(ureg, temp[3], \ + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \ + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); \ + ureg_SUB(ureg, temp[3], \ + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), \ + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); \ + ureg_MUL(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[3])); \ + ureg_MUL(ureg, temp[4], ureg_src(temp[1]), ureg_src(temp[4])); \ + ureg_ADD(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[4])); + + +static INLINE void +blend_multiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + EXTENDED_BLENDER_OVER_FUNC + ureg_MUL(ureg, temp[4], ureg_src(temp[0]), ureg_src(temp[1])); + ureg_ADD(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[3])); + + ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), + ureg_src(temp[3]), ureg_src(temp[2])); + + ureg_MOV(ureg, *out, ureg_src(temp[1])); +} + +static INLINE void +blend_screen( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + ureg_ADD(ureg, temp[3], ureg_src(temp[0]), ureg_src(temp[1])); + ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[1])); + ureg_SUB(ureg, *out, ureg_src(temp[3]), ureg_src(temp[2])); +} + +static INLINE void +blend_darken( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + EXTENDED_BLENDER_OVER_FUNC + ureg_MUL(ureg, temp[4], ureg_src(temp[0]), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_MUL(ureg, temp[5], ureg_src(temp[1]), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); + ureg_MIN(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5])); + ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4])); + + ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), + ureg_src(temp[3]), ureg_src(temp[2])); + + ureg_MOV(ureg, *out, ureg_src(temp[1])); +} + +static INLINE void +blend_lighten( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]); + EXTENDED_BLENDER_OVER_FUNC + ureg_MUL(ureg, temp[4], ureg_src(temp[0]), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_MUL(ureg, temp[5], ureg_src(temp[1]), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); + ureg_MAX(ureg, temp[4], ureg_src(temp[4]), ureg_src(temp[5])); + ureg_ADD(ureg, temp[1], ureg_src(temp[3]), ureg_src(temp[4])); + + ureg_MUL(ureg, temp[2], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_ADD(ureg, temp[3], ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W)); + ureg_SUB(ureg, ureg_writemask(temp[1], TGSI_WRITEMASK_W), + ureg_src(temp[3]), ureg_src(temp[2])); + + ureg_MOV(ureg, *out, ureg_src(temp[1])); +} + +static INLINE void +premultiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_MUL(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), + ureg_src(temp[0]), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W)); +} + +static INLINE void +unpremultiply( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_TEX(ureg, temp[0], TGSI_TEXTURE_2D, in[0], sampler[1]); +} + + +static INLINE void +color_bw( struct ureg_program *ureg, + struct ureg_dst *out, + struct ureg_src *in, + struct ureg_src *sampler, + struct ureg_dst *temp, + struct ureg_src *constant) +{ + ureg_ADD(ureg, temp[1], + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), + ureg_scalar(constant[1], TGSI_SWIZZLE_Y)); + ureg_RCP(ureg, temp[2], ureg_src(temp[1])); + ureg_ADD(ureg, temp[1], + ureg_scalar(constant[1], TGSI_SWIZZLE_Y), + ureg_src(temp[2])); + ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Y)); + ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Z), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X)); + ureg_SGE(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ), + ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_X), + ureg_src(temp[1])); + ureg_SGE(ureg, + ureg_writemask(temp[0], TGSI_WRITEMASK_W), + ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W), + ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_Y)); + ureg_MOV(ureg, *out, ureg_src(temp[0])); +} struct shader_asm_info { VGint id; - VGint num_tokens; - const char * txt; + ureg_func func; VGboolean needs_position; @@ -203,44 +412,44 @@ struct shader_asm_info { static const struct shader_asm_info shaders_asm[] = { /* fills */ - {VEGA_SOLID_FILL_SHADER, 40, solid_fill_asm, + {VEGA_SOLID_FILL_SHADER, solid_fill, VG_FALSE, 0, 1, 0, 0, 0, 0}, - {VEGA_LINEAR_GRADIENT_SHADER, 200, linear_grad_asm, + {VEGA_LINEAR_GRADIENT_SHADER, linear_grad, VG_TRUE, 0, 5, 0, 1, 0, 5}, - {VEGA_RADIAL_GRADIENT_SHADER, 200, radial_grad_asm, + {VEGA_RADIAL_GRADIENT_SHADER, radial_grad, VG_TRUE, 0, 5, 0, 1, 0, 6}, - {VEGA_PATTERN_SHADER, 100, pattern_asm, + {VEGA_PATTERN_SHADER, pattern, VG_TRUE, 1, 4, 0, 1, 0, 5}, /* image draw modes */ - {VEGA_IMAGE_NORMAL_SHADER, 200, image_normal_asm, + {VEGA_IMAGE_NORMAL_SHADER, image_normal, VG_TRUE, 0, 0, 3, 1, 0, 0}, - {VEGA_IMAGE_MULTIPLY_SHADER, 200, image_multiply_asm, + {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply, VG_TRUE, 0, 0, 3, 1, 0, 2}, - {VEGA_IMAGE_STENCIL_SHADER, 200, image_stencil_asm, + {VEGA_IMAGE_STENCIL_SHADER, image_stencil, VG_TRUE, 0, 0, 3, 1, 0, 2}, - {VEGA_MASK_SHADER, 100, mask_asm, + {VEGA_MASK_SHADER, mask, VG_TRUE, 0, 0, 1, 1, 0, 2}, /* extra blend modes */ - {VEGA_BLEND_MULTIPLY_SHADER, 200, blend_multiply_asm, + {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply, VG_TRUE, 1, 1, 2, 1, 0, 5}, - {VEGA_BLEND_SCREEN_SHADER, 200, blend_screen_asm, + {VEGA_BLEND_SCREEN_SHADER, blend_screen, VG_TRUE, 0, 0, 2, 1, 0, 4}, - {VEGA_BLEND_DARKEN_SHADER, 200, blend_darken_asm, + {VEGA_BLEND_DARKEN_SHADER, blend_darken, VG_TRUE, 1, 1, 2, 1, 0, 6}, - {VEGA_BLEND_LIGHTEN_SHADER, 200, blend_lighten_asm, + {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten, VG_TRUE, 1, 1, 2, 1, 0, 6}, /* premultiply */ - {VEGA_PREMULTIPLY_SHADER, 100, premultiply_asm, + {VEGA_PREMULTIPLY_SHADER, premultiply, VG_FALSE, 0, 0, 0, 0, 0, 1}, - {VEGA_UNPREMULTIPLY_SHADER, 100, unpremultiply_asm, + {VEGA_UNPREMULTIPLY_SHADER, unpremultiply, VG_FALSE, 0, 0, 0, 0, 0, 1}, /* color transform to black and white */ - {VEGA_BW_SHADER, 150, color_bw_asm, + {VEGA_BW_SHADER, color_bw, VG_FALSE, 1, 1, 0, 0, 0, 3}, }; #endif diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 1112ad9839d..2e10965be4f 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -37,7 +37,7 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_blit.h" #include "util/u_format.h" #include "util/u_tile.h" diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index 42300bb6d57..467b95b7519 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -35,7 +35,7 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" @@ -217,7 +217,7 @@ static void setup_mask_framebuffer(struct pipe_surface *surf, static void setup_mask_operation(VGMaskOperation operation) { struct vg_context *ctx = vg_current_context(); - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; const VGint param_bytes = 4 * sizeof(VGfloat); const VGfloat ones[4] = {1.f, 1.f, 1.f, 1.f}; void *shader = 0; @@ -225,17 +225,17 @@ static void setup_mask_operation(VGMaskOperation operation) /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + *cbuf = pipe_buffer_create(ctx->pipe->screen, 1, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, ones); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); switch (operation) { case VG_UNION_MASK: { if (!ctx->mask.union_fs) { @@ -320,22 +320,22 @@ static void setup_mask_samplers(struct pipe_texture *umask) static void setup_mask_fill(const VGfloat color[4]) { struct vg_context *ctx = vg_current_context(); - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; const VGint param_bytes = 4 * sizeof(VGfloat); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, 0, param_bytes, color); + *cbuf = pipe_buffer_create(ctx->pipe->screen, 1, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, color); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); cso_set_fragment_shader_handle(ctx->cso_context, shaders_cache_fill(ctx->sc, VEGA_SOLID_FILL_SHADER)); @@ -354,15 +354,12 @@ static void setup_mask_blend() struct pipe_blend_state blend; memset(&blend, 0, sizeof(struct pipe_blend_state)); - blend.blend_enable = 1; - blend.colormask |= PIPE_MASK_R; - blend.colormask |= PIPE_MASK_G; - blend.colormask |= PIPE_MASK_B; - blend.colormask |= PIPE_MASK_A; - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].blend_enable = 0; + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; cso_set_blend(ctx->cso_context, &blend); } diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index cc73771d358..3405d635f0c 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -32,7 +32,7 @@ #include "st_inlines.h" #include "pipe/p_compiler.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" @@ -77,7 +77,8 @@ struct vg_paint { struct pipe_sampler_state sampler; } pattern; - struct pipe_constant_buffer cbuf; + /* XXX next 3 all unneded? */ + struct pipe_buffer *cbuf; struct pipe_shader_state fs_state; void *fs; }; diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index b6d282d803b..f56ea0c8b44 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -37,7 +37,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_screen.h" #include "util/u_draw_quad.h" @@ -248,12 +248,12 @@ VGboolean polygon_is_closed(struct polygon *p) static void set_blend_for_fill(struct pipe_blend_state *blend) { memset(blend, 0, sizeof(struct pipe_blend_state)); - blend->colormask = 0; /*disable colorwrites*/ + blend->rt[0].colormask = 0; /*disable colorwrites*/ - blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; } static void draw_polygon(struct vg_context *ctx, @@ -293,6 +293,7 @@ static void draw_polygon(struct vg_context *ctx, /* tell pipe about the vertex attributes */ velement.src_offset = 0; + velement.instance_divisor = 0; velement.vertex_buffer_index = 0; velement.src_format = PIPE_FORMAT_R32G32_FLOAT; velement.nr_components = COMPONENTS; diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index 64e3a7c5453..05620efa9c0 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -30,7 +30,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" @@ -317,11 +317,11 @@ void renderer_copy_texture(struct renderer *ctx, { struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].colormask = PIPE_MASK_RGBA; cso_set_blend(ctx->cso, &blend); } @@ -486,11 +486,11 @@ void renderer_copy_surface(struct renderer *ctx, { struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].colormask = PIPE_MASK_RGBA; cso_set_blend(ctx->cso, &blend); } diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c index d9074a377b3..0e71a507bff 100644 --- a/src/gallium/state_trackers/vega/shader.c +++ b/src/gallium/state_trackers/vega/shader.c @@ -35,7 +35,7 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #define MAX_CONSTANTS 20 @@ -51,7 +51,7 @@ struct shader { VGImageMode image_mode; float constants[MAX_CONSTANTS]; - struct pipe_constant_buffer cbuf; + struct pipe_buffer *cbuf; struct pipe_shader_state fs_state; void *fs; }; @@ -96,25 +96,25 @@ static void setup_constant_buffer(struct shader *shader) { struct vg_context *ctx = shader->context; struct pipe_context *pipe = shader->context->pipe; - struct pipe_constant_buffer *cbuf = &shader->cbuf; + struct pipe_buffer **cbuf = &shader->cbuf; VGint param_bytes = paint_constant_buffer_size(shader->paint); float temp_buf[MAX_CONSTANTS]; assert(param_bytes <= sizeof(temp_buf)); paint_fill_constant_buffer(shader->paint, temp_buf); - if (cbuf->buffer == NULL || + if (*cbuf == NULL || memcmp(temp_buf, shader->constants, param_bytes) != 0) { - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); memcpy(shader->constants, temp_buf, param_bytes); - cbuf->buffer = pipe_user_buffer_create(pipe->screen, - &shader->constants, - sizeof(shader->constants)); + *cbuf = pipe_user_buffer_create(pipe->screen, + &shader->constants, + sizeof(shader->constants)); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); } static VGint blend_bind_samplers(struct vg_context *ctx, @@ -135,8 +135,8 @@ static VGint blend_bind_samplers(struct vg_context *ctx, textures[2] = stfb->blend_texture; if (!samplers[0] || !textures[0]) { - samplers[1] = samplers[2]; - textures[1] = textures[2]; + samplers[0] = samplers[2]; + textures[0] = textures[2]; } if (!samplers[1] || !textures[1]) { samplers[1] = samplers[0]; diff --git a/src/gallium/state_trackers/vega/shaders_cache.c b/src/gallium/state_trackers/vega/shaders_cache.c index f620075d0bc..f43fe6ee4cb 100644 --- a/src/gallium/state_trackers/vega/shaders_cache.c +++ b/src/gallium/state_trackers/vega/shaders_cache.c @@ -30,7 +30,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" @@ -123,17 +123,23 @@ static INLINE VGint range_max(VGint max, VGint current) return MAX2(max, current); } -static void -create_preamble(char *txt, - const struct shader_asm_info *shaders[SHADER_STAGES], - int num_shaders) +static void * +combine_shaders(const struct shader_asm_info *shaders[SHADER_STAGES], int num_shaders, + struct pipe_context *pipe, + struct pipe_shader_state *shader) { VGboolean declare_input = VG_FALSE; VGint start_const = -1, end_const = 0; VGint start_temp = -1, end_temp = 0; VGint start_sampler = -1, end_sampler = 0; - VGint i; + VGint i, current_shader = 0; VGint num_consts, num_temps, num_samplers; + struct ureg_program *ureg; + struct ureg_src in[2]; + struct ureg_src *sampler = NULL; + struct ureg_src *constant = NULL; + struct ureg_dst out, *temp = NULL; + void *p = NULL; for (i = 0; i < num_shaders; ++i) { if (shaders[i]->num_consts) @@ -158,99 +164,94 @@ create_preamble(char *txt, if (start_temp < 0) start_temp = 0; if (start_sampler < 0) - start_sampler = 0; + start_sampler = 0; num_consts = end_const - start_const; num_temps = end_temp - start_temp; num_samplers = end_sampler - start_sampler; - /* end exclusive */ - --end_const; - --end_temp; - --end_sampler; - sprintf(txt, "FRAG\n"); + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!ureg) + return NULL; if (declare_input) { - sprintf(txt + strlen(txt), "DCL IN[0], POSITION, LINEAR\n"); - sprintf(txt + strlen(txt), "DCL IN[1], GENERIC[0], PERSPECTIVE\n"); + in[0] = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_POSITION, + 0, + TGSI_INTERPOLATE_LINEAR); + in[1] = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_GENERIC, + 0, + TGSI_INTERPOLATE_PERSPECTIVE); } /* we always have a color output */ - sprintf(txt + strlen(txt), "DCL OUT[0], COLOR, CONSTANT\n"); - - if (num_consts > 1) - sprintf(txt + strlen(txt), "DCL CONST[%d..%d], CONSTANT\n", start_const, end_const); - else if (num_consts == 1) - sprintf(txt + strlen(txt), "DCL CONST[%d], CONSTANT\n", start_const); - - if (num_temps > 1) - sprintf(txt + strlen(txt), "DCL TEMP[%d..%d], CONSTANT\n", start_temp, end_temp); - else if (num_temps > 1) - sprintf(txt + strlen(txt), "DCL TEMP[%d], CONSTANT\n", start_temp); - - if (num_samplers > 1) - sprintf(txt + strlen(txt), "DCL SAMP[%d..%d], CONSTANT\n", start_sampler, end_sampler); - else if (num_samplers == 1) - sprintf(txt + strlen(txt), "DCL SAMP[%d], CONSTANT\n", start_sampler); -} + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); -static void * -combine_shaders(const struct shader_asm_info *shaders[SHADER_STAGES], int num_shaders, - struct pipe_context *pipe, - struct pipe_shader_state *shader) -{ - char *combined_txt; - int combined_len = MAX_PREAMBLE; - int combined_tokens = 0; - int i = 0; - int current_shader = 0; - int current_len; + if (num_consts >= 1) { + constant = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_const); + for (i = start_const; i < end_const; i++) { + constant[i] = ureg_DECL_constant(ureg, i); + } - for (i = 0; i < num_shaders; ++i) { - combined_len += strlen(shaders[i]->txt); - combined_tokens += shaders[i]->num_tokens; } - /* add for the %s->TEMP[0] substitutions */ - combined_len += num_shaders * 7 /*TEMP[0]*/ + 4 /*"END\n"*/; - combined_txt = (char*)malloc(combined_len); - combined_txt[0] = '\0'; + if (num_temps >= 1) { + temp = (struct ureg_dst *) malloc(sizeof(struct ureg_dst) * end_temp); + for (i = start_temp; i < end_temp; i++) { + temp[i] = ureg_DECL_temporary(ureg); + } + } - create_preamble(combined_txt, shaders, num_shaders); + if (num_samplers >= 1) { + sampler = (struct ureg_src *) malloc(sizeof(struct ureg_src) * end_sampler); + for (i = start_sampler; i < end_sampler; i++) { + sampler[i] = ureg_DECL_sampler(ureg, i); + } + } while (current_shader < num_shaders) { - const char temp[] = "TEMP[0]"; - const char out[] = "OUT[0]"; - const char *subst = temp; - - current_len = strlen(combined_txt); - - /* if the last shader then output */ - if (current_shader + 1 == num_shaders) - subst = out; - - snprintf(combined_txt + current_len, - combined_len - current_len, - shaders[current_shader]->txt, - subst); - ++current_shader; + if ((current_shader + 1) == num_shaders) { + shaders[current_shader]->func(ureg, + &out, + in, + sampler, + temp, + constant); + } else { + shaders[current_shader]->func(ureg, + &temp[0], + in, + sampler, + temp, + constant); + } + current_shader++; } + ureg_END(ureg); - current_len = strlen(combined_txt); - snprintf(combined_txt + current_len, - combined_len - current_len, - "END\n"); + shader->tokens = ureg_finalize(ureg); + if(!shader->tokens) + return NULL; - debug_printf("Combined shader is : \n%s\n", - combined_txt); + p = pipe->create_fs_state(pipe, shader); + ureg_destroy(ureg); - shader->tokens = tokens_from_assembly( - combined_txt, combined_tokens); + if (num_temps >= 1) { + for (i = start_temp; i < end_temp; i++) { + ureg_release_temporary(ureg, temp[i]); + } + } - free(combined_txt); + if (temp) + free(temp); + if (constant) + free(constant); + if (sampler) + free(sampler); - return pipe->create_fs_state(pipe, shader); + return p; } static void * diff --git a/src/gallium/state_trackers/vega/st_inlines.h b/src/gallium/state_trackers/vega/st_inlines.h index 610755e0636..419151c3aee 100644 --- a/src/gallium/state_trackers/vega/st_inlines.h +++ b/src/gallium/state_trackers/vega/st_inlines.h @@ -38,7 +38,7 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_state.h" static INLINE struct pipe_transfer * diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c index 00d23f5c227..426bf9bc62b 100644 --- a/src/gallium/state_trackers/vega/vg_context.c +++ b/src/gallium/state_trackers/vega/vg_context.c @@ -34,7 +34,7 @@ #include "st_inlines.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "cso_cache/cso_context.h" @@ -122,8 +122,8 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, void vg_destroy_context(struct vg_context *ctx) { - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; - struct pipe_constant_buffer *vsbuf = &ctx->vs_const_buffer; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; + struct pipe_buffer **vsbuf = &ctx->vs_const_buffer; util_destroy_blit(ctx->blit); renderer_destroy(ctx->renderer); @@ -131,11 +131,11 @@ void vg_destroy_context(struct vg_context *ctx) shader_destroy(ctx->shader); paint_destroy(ctx->default_paint); - if (cbuf && cbuf->buffer) - pipe_buffer_reference(&cbuf->buffer, NULL); + if (*cbuf) + pipe_buffer_reference(cbuf, NULL); - if (vsbuf && vsbuf->buffer) - pipe_buffer_reference(&vsbuf->buffer, NULL); + if (*vsbuf) + pipe_buffer_reference(vsbuf, NULL); if (ctx->clear.fs) { cso_delete_fragment_shader(ctx->cso_context, ctx->clear.fs); @@ -252,7 +252,7 @@ static void update_clip_state(struct vg_context *ctx) ctx->pipe->clear(ctx->pipe, PIPE_CLEAR_DEPTHSTENCIL, NULL, 1.0, 0); /* disable color writes */ - blend->colormask = 0; /*disable colorwrites*/ + blend->rt[0].colormask = 0; /*disable colorwrites*/ cso_set_blend(ctx->cso_context, blend); /* enable scissoring */ @@ -286,7 +286,6 @@ static void update_clip_state(struct vg_context *ctx) renderer_draw_quad(ctx->renderer, minx, miny, maxx, maxy, 0.0f); } - blend->colormask = 1; /*enable colorwrites*/ cso_restore_blend(ctx->cso_context); cso_restore_fragment_shader(ctx->cso_context); @@ -301,57 +300,56 @@ void vg_validate_state(struct vg_context *ctx) if ((ctx->state.dirty & BLEND_DIRTY)) { struct pipe_blend_state *blend = &ctx->state.g3d.blend; memset(blend, 0, sizeof(struct pipe_blend_state)); - blend->blend_enable = 1; - blend->colormask |= PIPE_MASK_R; - blend->colormask |= PIPE_MASK_G; - blend->colormask |= PIPE_MASK_B; - blend->colormask |= PIPE_MASK_A; + blend->rt[0].blend_enable = 1; + blend->rt[0].colormask = PIPE_MASK_RGBA; switch (ctx->state.vg.blend_mode) { case VG_BLEND_SRC: - blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].blend_enable = 0; break; case VG_BLEND_SRC_OVER: - blend->rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; break; case VG_BLEND_DST_OVER: - blend->rgb_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; - blend->alpha_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; break; case VG_BLEND_SRC_IN: - blend->rgb_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; - blend->alpha_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; break; case VG_BLEND_DST_IN: - blend->rgb_src_factor = PIPE_BLENDFACTOR_ZERO; - blend->alpha_src_factor = PIPE_BLENDFACTOR_ZERO; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; break; case VG_BLEND_MULTIPLY: case VG_BLEND_SCREEN: case VG_BLEND_DARKEN: case VG_BLEND_LIGHTEN: - blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend->rt[0].blend_enable = 0; break; case VG_BLEND_ADDITIVE: - blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rgb_dst_factor = PIPE_BLENDFACTOR_ONE; - blend->alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; break; default: assert(!"not implemented blend mode"); @@ -371,20 +369,20 @@ void vg_validate_state(struct vg_context *ctx) 2.f/fb->width, 2.f/fb->height, 1, 1, -1, -1, 0, 0 }; - struct pipe_constant_buffer *cbuf = &ctx->vs_const_buffer; + struct pipe_buffer **cbuf = &ctx->vs_const_buffer; vg_set_viewport(ctx, VEGA_Y0_BOTTOM); - pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 16, + pipe_buffer_reference(cbuf, NULL); + *cbuf = pipe_buffer_create(ctx->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, vs_consts); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, *cbuf); } if ((ctx->state.dirty & VS_DIRTY)) { cso_set_vertex_shader_handle(ctx->cso_context, diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h index ccc8889c8c5..bc88c8d139d 100644 --- a/src/gallium/state_trackers/vega/vg_context.h +++ b/src/gallium/state_trackers/vega/vg_context.h @@ -50,7 +50,7 @@ struct st_renderbuffer { }; struct st_framebuffer { - VGint init_width, init_height; + VGint width, height; struct st_renderbuffer *strb; struct st_renderbuffer *dsrb; @@ -113,7 +113,7 @@ struct vg_context } clear; struct { - struct pipe_constant_buffer cbuf; + struct pipe_buffer *cbuf; struct pipe_sampler_state sampler; struct vg_shader *union_fs; @@ -135,7 +135,7 @@ struct vg_context struct pipe_sampler_state blend_sampler; struct { - struct pipe_constant_buffer buffer; + struct pipe_buffer *buffer; void *color_matrix_fs; } filter; struct vg_paint *default_paint; @@ -145,7 +145,7 @@ struct vg_context struct vg_shader *plain_vs; struct vg_shader *clear_vs; struct vg_shader *texture_vs; - struct pipe_constant_buffer vs_const_buffer; + struct pipe_buffer *vs_const_buffer; }; struct vg_object { diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index e5039132758..a94dfb160c9 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -29,13 +29,16 @@ #include "mask.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_screen.h" #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_rect.h" +/* advertise OpenVG support */ +PUBLIC const int st_api_OpenVG = 1; + static struct pipe_texture * create_texture(struct pipe_context *pipe, enum pipe_format format, VGint width, VGint height) @@ -190,8 +193,8 @@ struct st_framebuffer * st_create_framebuffer(const void *visual, */ stfb->alpha_mask = 0; - stfb->init_width = width; - stfb->init_height = height; + stfb->width = width; + stfb->height = height; stfb->privateData = privateData; } @@ -279,11 +282,14 @@ void st_resize_framebuffer(struct st_framebuffer *stfb, /* If this is a noop, exit early and don't do the clear, etc below. */ - if (strb->width == width && - strb->height == height && + if (stfb->width == width && + stfb->height == height && state->zsbuf) return; + stfb->width = width; + stfb->height = height; + if (strb->width != width || strb->height != height) st_renderbuffer_alloc_storage(ctx, strb, width, height); @@ -368,14 +374,15 @@ void st_unreference_framebuffer(struct st_framebuffer *stfb) /* FIXME */ } -void st_make_current(struct vg_context *st, - struct st_framebuffer *draw, - struct st_framebuffer *read) +boolean st_make_current(struct vg_context *st, + struct st_framebuffer *draw, + struct st_framebuffer *read) { vg_set_current_context(st); if (st) { st->draw_buffer = draw; } + return VG_TRUE; } struct vg_context *st_get_current(void) @@ -425,3 +432,8 @@ int st_unbind_texture_surface(struct pipe_surface *ps, int target, int level) { return 0; } + +st_proc st_get_proc_address(const char *procname) +{ + return NULL; +} diff --git a/src/gallium/state_trackers/vega/vg_tracker.h b/src/gallium/state_trackers/vega/vg_tracker.h index 0f0c27f4550..c1196954a76 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.h +++ b/src/gallium/state_trackers/vega/vg_tracker.h @@ -99,9 +99,9 @@ PUBLIC void st_unreference_framebuffer(struct st_framebuffer *stfb); PUBLIC -void st_make_current(struct vg_context *st, - struct st_framebuffer *draw, - struct st_framebuffer *read); +boolean st_make_current(struct vg_context *st, + struct st_framebuffer *draw, + struct st_framebuffer *read); PUBLIC struct vg_context *st_get_current(void); diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c index f2f0264844a..0785d2c6b85 100644 --- a/src/gallium/state_trackers/wgl/stw_context.c +++ b/src/gallium/state_trackers/wgl/stw_context.c @@ -34,11 +34,6 @@ #include "state_tracker/st_context.h" #include "state_tracker/st_public.h" -#ifdef DEBUG -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#endif - #include "stw_icd.h" #include "stw_device.h" #include "stw_winsys.h" @@ -152,7 +147,6 @@ DrvCreateLayerContext( const struct stw_pixelformat_info *pfi; GLvisual visual; struct stw_context *ctx = NULL; - struct pipe_screen *screen = NULL; struct pipe_context *pipe = NULL; if(!stw_dev) @@ -175,28 +169,12 @@ DrvCreateLayerContext( ctx->hdc = hdc; ctx->iPixelFormat = iPixelFormat; - screen = stw_dev->screen; - -#ifdef DEBUG - /* Unwrap screen */ - if(stw_dev->trace_running) - screen = trace_screen(screen)->screen; -#endif - - pipe = stw_dev->stw_winsys->create_context( screen ); + /* priv == hdc, pass to stw_flush_frontbuffer as context_private + */ + pipe = stw_dev->screen->context_create( stw_dev->screen, hdc ); if (pipe == NULL) goto no_pipe; -#ifdef DEBUG - /* Wrap context */ - if(stw_dev->trace_running) - pipe = trace_context_create(stw_dev->screen, pipe); -#endif - - /* pass to stw_flush_frontbuffer as context_private */ - assert(!pipe->priv); - pipe->priv = hdc; - ctx->st = st_create_context( pipe, &visual, NULL ); if (ctx->st == NULL) goto no_st_ctx; diff --git a/src/gallium/state_trackers/wgl/stw_device.h b/src/gallium/state_trackers/wgl/stw_device.h index 0bf3b0da825..a83841f6b7d 100644 --- a/src/gallium/state_trackers/wgl/stw_device.h +++ b/src/gallium/state_trackers/wgl/stw_device.h @@ -30,7 +30,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" #include "util/u_handle_table.h" #include "stw_icd.h" #include "stw_pixelformat.h" diff --git a/src/gallium/state_trackers/wgl/stw_ext_gallium.c b/src/gallium/state_trackers/wgl/stw_ext_gallium.c index fb30ec5dba9..8dd63f124ad 100644 --- a/src/gallium/state_trackers/wgl/stw_ext_gallium.c +++ b/src/gallium/state_trackers/wgl/stw_ext_gallium.c @@ -48,32 +48,8 @@ wglGetGalliumScreenMESA(void) struct pipe_context * APIENTRY wglCreateGalliumContextMESA(void) { - struct pipe_screen *screen = NULL; - struct pipe_context *pipe = NULL; - if(!stw_dev) return NULL; - screen = stw_dev->screen; - -#ifdef DEBUG - /* Unwrap screen */ - if(stw_dev->trace_running) - screen = trace_screen(screen)->screen; -#endif - - pipe = stw_dev->stw_winsys->create_context( screen ); - if (pipe == NULL) - goto no_pipe; - -#ifdef DEBUG - /* Wrap context */ - if(stw_dev->trace_running) - pipe = trace_context_create(stw_dev->screen, pipe); -#endif - - return pipe; - -no_pipe: - return NULL; + return stw_dev->screen->context_create( stw_dev->screen, NULL ); } diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h index b80d168a7ce..08cc4973bce 100644 --- a/src/gallium/state_trackers/wgl/stw_framebuffer.h +++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h @@ -32,7 +32,7 @@ #include "main/mtypes.h" -#include "pipe/p_thread.h" +#include "os/os_thread.h" struct pipe_surface; struct stw_pixelformat_info; diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c index 54cc3614129..7d4c2430b0c 100644 --- a/src/gallium/state_trackers/wgl/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c @@ -95,8 +95,6 @@ stw_pf_depth_stencil[] = { { PIPE_FORMAT_Z24X8_UNORM, {24, 0} }, { PIPE_FORMAT_X8Z24_UNORM, {24, 0} }, { PIPE_FORMAT_Z16_UNORM, {16, 0} }, - /* pure stencil */ - { PIPE_FORMAT_S8_UNORM, { 0, 8} }, /* combined depth-stencil */ { PIPE_FORMAT_S8Z24_UNORM, {24, 8} }, { PIPE_FORMAT_Z24S8_UNORM, {24, 8} } @@ -220,7 +218,8 @@ stw_pixelformat_init( void ) const struct stw_pf_color_info *color = &stw_pf_color[j]; if(!screen->is_format_supported(screen, color->format, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) + PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET, 0)) continue; for(k = 0; k < Elements(stw_pf_doublebuffer); ++k) { diff --git a/src/gallium/state_trackers/wgl/stw_winsys.h b/src/gallium/state_trackers/wgl/stw_winsys.h index 1de6e906d0d..270fad56a19 100644 --- a/src/gallium/state_trackers/wgl/stw_winsys.h +++ b/src/gallium/state_trackers/wgl/stw_winsys.h @@ -43,9 +43,6 @@ struct stw_winsys struct pipe_screen * (*create_screen)( void ); - struct pipe_context * - (*create_context)( struct pipe_screen *screen ); - /** * Present the color buffer to the window associated with the device context. */ diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index 1c248a629e6..c50873c1508 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,10 +4,7 @@ #include "xorg_exa_tgsi.h" #include "cso_cache/cso_context.h" -#include "util/u_draw_quad.h" -#include "util/u_math.h" -#include "pipe/p_inlines.h" /*XXX also in Xrender.h but the including it here breaks compilition */ #define XFixedToDouble(f) (((double) (f)) / 65536.) @@ -220,13 +217,13 @@ bind_blend_state(struct exa_context *exa, int op, blend_for_op(&blend_opt, op, pSrcPicture, pMaskPicture, pDstPicture); memset(&blend, 0, sizeof(struct pipe_blend_state)); - blend.blend_enable = 1; - blend.colormask |= PIPE_MASK_RGBA; + blend.rt[0].blend_enable = 1; + blend.rt[0].colormask = PIPE_MASK_RGBA; - blend.rgb_src_factor = blend_opt.rgb_src; - blend.alpha_src_factor = blend_opt.rgb_src; - blend.rgb_dst_factor = blend_opt.rgb_dst; - blend.alpha_dst_factor = blend_opt.rgb_dst; + blend.rt[0].rgb_src_factor = blend_opt.rgb_src; + blend.rt[0].alpha_src_factor = blend_opt.rgb_src; + blend.rt[0].rgb_dst_factor = blend_opt.rgb_dst; + blend.rt[0].alpha_dst_factor = blend_opt.rgb_dst; cso_set_blend(exa->renderer->cso, &blend); } diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index 650d2c0d1db..221ce772af9 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -49,8 +49,7 @@ #include <X11/extensions/dpms.h> #endif -#include "pipe/p_inlines.h" -#include "util/u_format.h" +#include "util/u_inlines.h" #include "util/u_rect.h" #ifdef HAVE_LIBKMS @@ -243,7 +242,11 @@ crtc_load_cursor_argb_kms(xf86CrtcPtr crtc, CARD32 * image) unsigned attr[8]; attr[0] = KMS_BO_TYPE; +#ifdef KMS_BO_TYPE_CURSOR_64X64_A8R8G8B8 + attr[1] = KMS_BO_TYPE_CURSOR_64X64_A8R8G8B8; +#else attr[1] = KMS_BO_TYPE_CURSOR; +#endif attr[2] = KMS_WIDTH; attr[3] = 64; attr[4] = KMS_HEIGHT; diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index fd82f4fa1d1..7457fe1c6d6 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -38,15 +38,17 @@ #include "dri2.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" -#include "util/u_rect.h" /* Make all the #if cases in the code esier to read */ -/* XXX can it be set to 1? */ #ifndef DRI2INFOREC_VERSION -#define DRI2INFOREC_VERSION 0 +#define DRI2INFOREC_VERSION 1 +#endif + +#if DRI2INFOREC_VERSION == 2 +static Bool set_format_in_do_create_buffer; #endif typedef struct { @@ -147,7 +149,9 @@ dri2_do_create_buffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int form buffer->driverPrivate = private; buffer->flags = 0; /* not tiled */ #if DRI2INFOREC_VERSION == 2 - ((DRI2Buffer2Ptr)buffer)->format = 0; + /* ABI forwards/backwards compatibility */ + if (set_format_in_do_create_buffer) + ((DRI2Buffer2Ptr)buffer)->format = 0; #elif DRI2INFOREC_VERSION >= 3 buffer->format = 0; #endif @@ -211,7 +215,9 @@ dri2_destroy_buffer(DrawablePtr pDraw, DRI2Buffer2Ptr buffer) xfree(buffer); } -#else /* DRI2INFOREC_VERSION < 2 */ +#endif /* DRI2INFOREC_VERSION >= 2 */ + +#if DRI2INFOREC_VERSION <= 2 static DRI2BufferPtr dri2_create_buffers(DrawablePtr pDraw, unsigned int *attachments, int count) @@ -261,7 +267,7 @@ dri2_destroy_buffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count) } } -#endif /* DRI2INFOREC_VERSION >= 2 */ +#endif /* DRI2INFOREC_VERSION <= 2 */ static void dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion, @@ -369,12 +375,19 @@ xorg_dri2_init(ScreenPtr pScreen) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; modesettingPtr ms = modesettingPTR(pScrn); DRI2InfoRec dri2info; - #if DRI2INFOREC_VERSION >= 2 - dri2info.version = DRI2INFOREC_VERSION; -#else - dri2info.version = 1; + int major, minor; + + if (xf86LoaderCheckSymbol("DRI2Version")) { + DRI2Version(&major, &minor); + } else { + /* Assume version 1.0 */ + major = 1; + minor = 0; + } #endif + + dri2info.version = DRI2INFOREC_VERSION; dri2info.fd = ms->fd; dri2info.driverName = pScrn->driverName; @@ -383,7 +396,22 @@ xorg_dri2_init(ScreenPtr pScreen) #if DRI2INFOREC_VERSION >= 2 dri2info.CreateBuffer = dri2_create_buffer; dri2info.DestroyBuffer = dri2_destroy_buffer; -#else +#endif + + /* For X servers in the 1.6.x series there where two DRI2 version. + * This allows us to build one binary that works on both servers. + */ +#if DRI2INFOREC_VERSION == 2 + if (minor == 0) { + set_format_in_do_create_buffer = FALSE; + dri2info.CreateBuffers = dri2_create_buffers; + dri2info.DestroyBuffers = dri2_destroy_buffers; + } else + set_format_in_do_create_buffer = FALSE; +#endif + + /* For version 1 set these unconditionaly. */ +#if DRI2INFOREC_VERSION == 1 dri2info.CreateBuffers = dri2_create_buffers; dri2info.DestroyBuffers = dri2_destroy_buffers; #endif diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index b02fe68f313..f53a879a14a 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -45,7 +45,6 @@ #include "miscstruct.h" #include "dixstruct.h" #include "xf86xv.h" -#include <X11/extensions/Xv.h> #ifndef XSERVER_LIBPCIACCESS #error "libpciaccess needed" #endif @@ -79,11 +78,13 @@ typedef enum { OPTION_SW_CURSOR, OPTION_2D_ACCEL, + OPTION_DEBUG_FALLBACK, } drv_option_enums; static const OptionInfoRec drv_options[] = { {OPTION_SW_CURSOR, "SWcursor", OPTV_BOOLEAN, {0}, FALSE}, {OPTION_2D_ACCEL, "2DAccel", OPTV_BOOLEAN, {0}, FALSE}, + {OPTION_DEBUG_FALLBACK, "DebugFallback", OPTV_BOOLEAN, {0}, FALSE}, {-1, NULL, OPTV_NONE, {0}, FALSE} }; @@ -111,6 +112,28 @@ xorg_tracker_set_functions(ScrnInfoPtr scrn) scrn->ValidMode = drv_valid_mode; } +Bool +xorg_tracker_have_modesetting(ScrnInfoPtr pScrn, struct pci_device *device) +{ + char *BusID = xalloc(64); + sprintf(BusID, "pci:%04x:%02x:%02x.%d", + device->domain, device->bus, + device->dev, device->func); + + if (drmCheckModesettingSupported(BusID)) { + xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0, + "Drm modesetting not supported %s\n", BusID); + xfree(BusID); + return FALSE; + } + + xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0, + "Drm modesetting supported on %s\n", BusID); + + xfree(BusID); + return TRUE; +} + /* * Internal function definitions @@ -206,16 +229,41 @@ drv_init_drm(ScrnInfoPtr pScrn) ms->PciInfo->dev, ms->PciInfo->func ); - ms->fd = drmOpen(NULL, BusID); - if (ms->fd < 0) - return FALSE; + ms->api = drm_api_create(); + ms->fd = drmOpen(ms->api ? ms->api->driver_name : NULL, BusID); + xfree(BusID); + + if (ms->fd >= 0) + return TRUE; + + if (ms->api && ms->api->destroy) + ms->api->destroy(ms->api); + + ms->api = NULL; + + return FALSE; } return TRUE; } static Bool +drv_close_drm(ScrnInfoPtr pScrn) +{ + modesettingPtr ms = modesettingPTR(pScrn); + + if (ms->api && ms->api->destroy) + ms->api->destroy(ms->api); + ms->api = NULL; + + drmClose(ms->fd); + ms->fd = -1; + + return TRUE; +} + +static Bool drv_init_resource_management(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); @@ -229,7 +277,6 @@ drv_init_resource_management(ScrnInfoPtr pScrn) if (ms->screen || ms->kms) return TRUE; - ms->api = drm_api_create(); if (ms->api) { ms->screen = ms->api->create_screen(ms->api, ms->fd, NULL); @@ -269,10 +316,6 @@ drv_close_resource_management(ScrnInfoPtr pScrn) } ms->screen = NULL; - if (ms->api && ms->api->destroy) - ms->api->destroy(ms->api); - ms->api = NULL; - #ifdef HAVE_LIBKMS if (ms->kms) kms_destroy(&ms->kms); @@ -629,10 +672,11 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) xf86SetBlackWhitePixels(pScreen); + ms->accelerate_2d = xf86ReturnOptValBool(ms->Options, OPTION_2D_ACCEL, FALSE); + ms->debug_fallback = xf86ReturnOptValBool(ms->Options, OPTION_DEBUG_FALLBACK, TRUE); + if (ms->screen) { - ms->exa = xorg_exa_init(pScrn, xf86ReturnOptValBool(ms->Options, - OPTION_2D_ACCEL, TRUE)); - ms->debug_fallback = debug_get_bool_option("XORG_DEBUG_FALLBACK", TRUE); + ms->exa = xorg_exa_init(pScrn, ms->accelerate_2d); xorg_xv_init(pScreen); #ifdef DRI2 @@ -640,6 +684,17 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) #endif } + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "2D Acceleration is %s\n", + ms->screen && ms->accelerate_2d ? "enabled" : "disabled"); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Fallback debugging is %s\n", + ms->debug_fallback ? "enabled" : "disabled"); +#ifdef DRI2 + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "3D Acceleration is %s\n", + ms->screen ? "enabled" : "disabled"); +#else + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "3D Acceleration is disabled\n"); +#endif + miInitializeBackingStore(pScreen); xf86SetBackingStore(pScreen); xf86SetSilkenMouse(pScreen); @@ -823,8 +878,7 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen) drv_close_resource_management(pScrn); - drmClose(ms->fd); - ms->fd = -1; + drv_close_drm(pScrn); pScrn->vtSema = FALSE; pScreen->CloseScreen = ms->CloseScreen; @@ -954,7 +1008,11 @@ drv_create_front_buffer_kms(ScrnInfoPtr pScrn) int ret; attr[0] = KMS_BO_TYPE; +#ifdef KMS_BO_TYPE_SCANOUT_X8R8G8B8 + attr[1] = KMS_BO_TYPE_SCANOUT_X8R8G8B8; +#else attr[1] = KMS_BO_TYPE_SCANOUT; +#endif attr[2] = KMS_WIDTH; attr[3] = pScrn->virtualX; attr[4] = KMS_HEIGHT; @@ -1012,12 +1070,22 @@ drv_bind_front_buffer_kms(ScrnInfoPtr pScrn) goto err_destroy; pScreen->ModifyPixmapHeader(rootPixmap, - pScreen->width, - pScreen->height, + pScrn->virtualX, + pScrn->virtualY, pScreen->rootDepth, pScrn->bitsPerPixel, stride, ptr); + + /* This a hack to work around EnableDisableFBAccess setting the pointer + * the real fix would be to replace pScrn->EnableDisableFBAccess hook + * and set the rootPixmap->devPrivate.ptr to something valid before that. + * + * But in its infinit visdome something uses either this some times before + * that, so our hook doesn't get called before the crash happens. + */ + pScrn->pixmapPrivate.ptr = ptr; + return TRUE; err_destroy: diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index d9432babf18..ae66c4baa91 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -41,9 +41,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" -#include "util/u_format.h" #include "util/u_rect.h" #include "util/u_math.h" #include "util/u_debug.h" @@ -1063,7 +1061,10 @@ xorg_exa_init(ScrnInfoPtr pScrn, Bool accel) } exa->scrn = ms->screen; - exa->pipe = ms->api->create_context(ms->api, exa->scrn); + exa->pipe = exa->scrn->context_create(exa->scrn, NULL); + if (exa->pipe == NULL) + goto out_err; + /* Share context with DRI */ ms->ctx = exa->pipe; diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index bed17caab77..3e5e6bd6a6e 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -6,11 +6,9 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" -#include "util/u_simple_shaders.h" #include "tgsi/tgsi_ureg.h" diff --git a/src/gallium/state_trackers/xorg/xorg_output.c b/src/gallium/state_trackers/xorg/xorg_output.c index 251f331ea7a..13c3fb97e3b 100644 --- a/src/gallium/state_trackers/xorg/xorg_output.c +++ b/src/gallium/state_trackers/xorg/xorg_output.c @@ -49,8 +49,6 @@ #include <X11/extensions/dpms.h> #endif -#include "X11/Xatom.h" - #include "xorg_tracker.h" static char *output_enum_list[] = { diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index d80f341e6c2..83b0d31e38d 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -5,12 +5,11 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" -#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include <math.h> @@ -379,14 +378,14 @@ struct xorg_renderer * renderer_create(struct pipe_context *pipe) void renderer_destroy(struct xorg_renderer *r) { - struct pipe_constant_buffer *vsbuf = &r->vs_const_buffer; - struct pipe_constant_buffer *fsbuf = &r->fs_const_buffer; + struct pipe_buffer **vsbuf = &r->vs_const_buffer; + struct pipe_buffer **fsbuf = &r->fs_const_buffer; - if (vsbuf && vsbuf->buffer) - pipe_buffer_reference(&vsbuf->buffer, NULL); + if (*vsbuf) + pipe_buffer_reference(vsbuf, NULL); - if (fsbuf && fsbuf->buffer) - pipe_buffer_reference(&fsbuf->buffer, NULL); + if (*fsbuf) + pipe_buffer_reference(fsbuf, NULL); if (r->shaders) { xorg_shaders_destroy(r->shaders); @@ -409,20 +408,20 @@ void renderer_set_constants(struct xorg_renderer *r, const float *params, int param_bytes) { - struct pipe_constant_buffer *cbuf = + struct pipe_buffer **cbuf = (shader_type == PIPE_SHADER_VERTEX) ? &r->vs_const_buffer : &r->fs_const_buffer; - pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(r->pipe->screen, 16, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); + pipe_buffer_reference(cbuf, NULL); + *cbuf = pipe_buffer_create(r->pipe->screen, 16, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); - if (cbuf->buffer) { - pipe_buffer_write(r->pipe->screen, cbuf->buffer, + if (*cbuf) { + pipe_buffer_write(r->pipe->screen, *cbuf, 0, param_bytes, params); } - r->pipe->set_constant_buffer(r->pipe, shader_type, 0, cbuf); + r->pipe->set_constant_buffer(r->pipe, shader_type, 0, *cbuf); } @@ -445,11 +444,11 @@ void renderer_copy_prepare(struct xorg_renderer *r, { struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].colormask = PIPE_MASK_RGBA; cso_set_blend(r->cso, &blend); } diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.h b/src/gallium/state_trackers/xorg/xorg_renderer.h index 5272cde2b3f..af6aa0567d6 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.h +++ b/src/gallium/state_trackers/xorg/xorg_renderer.h @@ -23,8 +23,8 @@ struct xorg_renderer { int fb_width; int fb_height; - struct pipe_constant_buffer vs_const_buffer; - struct pipe_constant_buffer fs_const_buffer; + struct pipe_buffer *vs_const_buffer; + struct pipe_buffer *fs_const_buffer; float buffer[BUF_SIZE]; int buffer_size; diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index 4d5d4780dc4..58bb60a721d 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -47,6 +47,8 @@ #endif #include "pipe/p_screen.h" +#include "util/u_inlines.h" +#include "util/u_debug.h" #include "state_tracker/drm_api.h" #define DRV_ERROR(msg) xf86DrvMsg(pScrn->scrnIndex, X_ERROR, msg); @@ -112,6 +114,7 @@ typedef struct _modesettingRec /* exa */ struct exa_context *exa; Bool noEvict; + Bool accelerate_2d; Bool debug_fallback; /* winsys hocks */ diff --git a/src/gallium/state_trackers/xorg/xorg_winsys.h b/src/gallium/state_trackers/xorg/xorg_winsys.h index 47ee4b9ffd8..865733bca2b 100644 --- a/src/gallium/state_trackers/xorg/xorg_winsys.h +++ b/src/gallium/state_trackers/xorg/xorg_winsys.h @@ -45,5 +45,6 @@ void xorg_tracker_set_functions(ScrnInfoPtr scrn); const OptionInfoRec * xorg_tracker_available_options(int chipid, int busid); +Bool xorg_tracker_have_modesetting(ScrnInfoPtr pScrn, struct pci_device *device); #endif diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index 6b5a41a3727..3dcef22c132 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -11,9 +11,6 @@ #include "cso_cache/cso_context.h" #include "pipe/p_screen.h" -#include "pipe/p_inlines.h" - -#include "util/u_format.h" /*XXX get these from pipe's texture limits */ #define IMAGE_MAX_WIDTH 2048 @@ -403,14 +400,14 @@ bind_blend_state(struct xorg_xv_port_priv *port) struct pipe_blend_state blend; memset(&blend, 0, sizeof(struct pipe_blend_state)); - blend.blend_enable = 1; - blend.colormask |= PIPE_MASK_RGBA; + blend.rt[0].blend_enable = 0; + blend.rt[0].colormask = PIPE_MASK_RGBA; /* porter&duff src */ - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; cso_set_blend(port->r->cso, &blend); } @@ -486,8 +483,11 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, int dxo, dyo; Bool hdtv; int x, y, w, h; - struct exa_pixmap_priv *dst = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); + struct exa_pixmap_priv *dst; + struct pipe_surface *dst_surf = NULL; + + exaMoveInPixmap(pPixmap); + dst = exaGetPixmapDriverPrivate(pPixmap); if (dst && !dst->tex) { xorg_exa_set_shared_usage(pPixmap); @@ -497,6 +497,7 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, if (!dst || !dst->tex) XORG_FALLBACK("Xv destination %s", !dst ? "!dst" : "!dst->tex"); + dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y)); REGION_TRANSLATE(pScrn->pScreen, dstRegion, -pPixmap->screen_x, @@ -516,7 +517,6 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, bind_samplers(pPriv); setup_fs_video_constants(pPriv->r, hdtv); - exaMoveInPixmap(pPixmap); DamageDamageRegion(&pPixmap->drawable, dstRegion); while (nbox--) { diff --git a/src/gallium/winsys/drm/Makefile.egl b/src/gallium/winsys/drm/Makefile.egl new file mode 100644 index 00000000000..b1f20385502 --- /dev/null +++ b/src/gallium/winsys/drm/Makefile.egl @@ -0,0 +1,62 @@ +# src/gallium/winsys/drm/Makefile.egl + +# The driver Makefile should define +# +# EGL_DRIVER_NAME, the name of the driver +# EGL_DRIVER_SOURCES, the sources of the driver +# EGL_DRIVER_LIBS, extra libraries needed by the driver +# EGL_DRIVER_PIPES, the pipe drivers of the driver +# +# before including this file. + +EGL_DRIVER_OBJECTS = $(EGL_DRIVER_SOURCES:.c=.o) + +common_LIBS = -ldrm -lm -ldl + +x11_ST = $(TOP)/src/gallium/state_trackers/egl/libeglx11.a +x11_LIBS = $(common_LIBS) -lX11 -lXext -lXfixes + +kms_ST = $(TOP)/src/gallium/state_trackers/egl/libeglkms.a +kms_LIBS = $(common_LIBS) + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + + +##### TARGETS ##### + +EGL_DISPLAY_DRIVERS = $(foreach dpy, $(EGL_DISPLAYS), egl_$(dpy)_$(EGL_DRIVER_NAME).so) + +EGL_DISPLAY_LIBS = $(foreach drv, $(EGL_DISPLAY_DRIVERS), $(TOP)/$(LIB_DIR)/$(drv)) + +default: $(EGL_DISPLAY_LIBS) + +$(EGL_DISPLAY_LIBS): $(TOP)/$(LIB_DIR)/%.so: %.so + $(INSTALL) $< $(TOP)/$(LIB_DIR) + +define mklib-egl +$(MKLIB) -o $@ -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + $(MKLIB_OPTIONS) $(EGL_DRIVER_OBJECTS) \ + -Wl,--whole-archive $($(1)_ST) -Wl,--no-whole-archive \ + $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $($(1)_LIBS) $(EGL_DRIVER_LIBS) +endef + +egl_x11_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(x11_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile + $(call mklib-egl,x11) + +egl_kms_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(kms_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile + $(call mklib-egl,kms) + +clean: + -rm -f $(EGL_DRIVER_OBJECTS) + -rm -f $(EGL_DISPLAY_DRIVERS) + +install: $(EGL_DISPLAY_LIBS) + $(INSTALL) -d $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR) + for lib in $(EGL_DISPLAY_LIBS); do \ + $(MINSTALL) -m 755 "$$lib" $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR); \ + done + +depend: diff --git a/src/gallium/winsys/drm/Makefile.template b/src/gallium/winsys/drm/Makefile.template index 9635c3c50e9..960353a73d9 100644 --- a/src/gallium/winsys/drm/Makefile.template +++ b/src/gallium/winsys/drm/Makefile.template @@ -82,18 +82,11 @@ SHARED_INCLUDES = \ default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME) $(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template - $(MKLIB) -noprefix -o $@ \ + $(MKLIB) -o $@ -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ $(OBJECTS) $(PIPE_DRIVERS) \ -Wl,--start-group $(MESA_MODULES) -Wl,--end-group \ $(WINOBJ) $(DRI_LIB_DEPS) $(DRIVER_EXTRAS) -$(LIBNAME_EGL): $(WINSYS_OBJECTS) $(LIBS) - $(MKLIB) -o $(LIBNAME_EGL) \ - -linker "$(CC)" \ - -noprefix \ - $(OBJECTS) $(MKLIB_OPTIONS) $(WINSYS_OBJECTS) $(PIPE_DRIVERS) $(WINOBJ) $(DRI_LIB_DEPS) \ - --whole-archive $(LIBS) $(GALLIUM_AUXILIARIES) --no-whole-archive $(DRIVER_EXTRAS) - $(TOP)/$(LIB_DIR)/gallium: mkdir -p $@ diff --git a/src/gallium/winsys/drm/i965/egl/Makefile b/src/gallium/winsys/drm/i965/egl/Makefile index a1b32eb2a79..1c132582005 100644 --- a/src/gallium/winsys/drm/i965/egl/Makefile +++ b/src/gallium/winsys/drm/i965/egl/Makefile @@ -1,29 +1,14 @@ TOP = ../../../../../.. -GALLIUMDIR = ../../../.. include $(TOP)/configs/current -LIBNAME = EGL_i965.so +EGL_DRIVER_NAME = i965 +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_intel -PIPE_DRIVERS = \ - $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ - $(GALLIUMDIR)/winsys/drm/i965/gem/libi965drm.a \ +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/i965/gem/libi965drm.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/i965/libi965.a -DRIVER_SOURCES = - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -DRIVER_EXTRAS = -ldrm_intel - -ASM_SOURCES = - -DRIVER_DEFINES = -I../gem $(shell pkg-config libdrm --atleast-version=2.3.1 \ - && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") - -include ../../Makefile.template - -symlinks: +include ../../Makefile.egl diff --git a/src/gallium/winsys/drm/i965/egl/dummy.c b/src/gallium/winsys/drm/i965/egl/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/i965/egl/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c index fc9678d2b62..a061eef0beb 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c @@ -212,11 +212,6 @@ i965_libdrm_create_screen(struct drm_api *api, int drmFD, return brw_create_screen(&idws->base, deviceID); } -static struct pipe_context * -i965_libdrm_create_context(struct drm_api *api, struct pipe_screen *screen) -{ - return brw_create_context(screen); -} static void destroy(struct drm_api *api) @@ -228,7 +223,7 @@ destroy(struct drm_api *api) struct drm_api i965_libdrm_api = { - .create_context = i965_libdrm_create_context, + .name = "i965", .create_screen = i965_libdrm_create_screen, .texture_from_shared_handle = i965_libdrm_texture_from_shared_handle, .shared_handle_from_texture = i965_libdrm_shared_handle_from_texture, diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c index a4a72b372dd..07be1df87f0 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c @@ -1,6 +1,7 @@ #include "i965_drm_winsys.h" #include "util/u_memory.h" +#include "util/u_inlines.h" #include "i915_drm.h" #include "intel_bufmgr.h" diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index d2b9a1ab311..74501eeb16f 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -469,31 +469,12 @@ fail: } -static struct pipe_context * -xlib_create_i965_context( struct pipe_screen *screen, - void *context_private ) -{ - struct pipe_context *pipe; - - pipe = brw_create_context(screen); - if (pipe == NULL) - goto fail; - - pipe->priv = context_private; - return pipe; - -fail: - /* Free stuff here */ - return NULL; -} - struct xm_driver xlib_i965_driver = { .create_pipe_screen = xlib_create_i965_screen, - .create_pipe_context = xlib_create_i965_context, .display_surface = xlib_i965_display_surface }; diff --git a/src/gallium/winsys/drm/i965/xorg/Makefile b/src/gallium/winsys/drm/i965/xorg/Makefile index d91d0006efd..c25726b0bb1 100644 --- a/src/gallium/winsys/drm/i965/xorg/Makefile +++ b/src/gallium/winsys/drm/i965/xorg/Makefile @@ -1,19 +1,25 @@ -TARGET = modesetting_drv.so -CFILES = $(wildcard ./*.c) -OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) TOP = ../../../../../.. -include $(TOP)/configs/current -INCLUDES = \ - $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \ - -I../gem \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include \ - -I$(TOP)/src/egl/main +GALLIUMDIR = $(TOP)/src/gallium + +TARGET = i965g_drv.so + +CFILES = $(wildcard ./*.c) + +include ${TOP}/configs/current + +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) + +CFLAGS = -DHAVE_CONFIG_H \ + -g -Wall -Wimplicit-function-declaration -fPIC \ + $(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/drivers \ + -I${GALLIUMDIR}/auxiliary \ + -I${TOP}/src/mesa \ + -I$(TOP)/include \ + -I$(TOP)/src/egl/main LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ @@ -23,20 +29,21 @@ LIBS = \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) -DRIVER_DEFINES = \ - -DHAVE_CONFIG_H - - +TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) ############################################# +all default: $(TARGET) $(TARGET_STAGING) - -all default: $(TARGET) - -$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS) +$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS) $(TOP)/bin/mklib -noprefix -o $@ \ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel +$(TOP)/$(LIB_DIR)/gallium: + mkdir -p $@ + +$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium + $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium + clean: rm -rf $(OBJECTS) $(TARGET) @@ -44,14 +51,4 @@ install: $(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) $(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) - -############################################## - - -.c.o: - $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@ - - -############################################## - .PHONY = all clean install diff --git a/src/gallium/winsys/drm/intel/egl/Makefile b/src/gallium/winsys/drm/intel/egl/Makefile index 1397e9f7290..60d675ca73d 100644 --- a/src/gallium/winsys/drm/intel/egl/Makefile +++ b/src/gallium/winsys/drm/intel/egl/Makefile @@ -1,29 +1,14 @@ TOP = ../../../../../.. -GALLIUMDIR = ../../../.. include $(TOP)/configs/current -LIBNAME = EGL_i915.so +EGL_DRIVER_NAME = i915 +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_intel -PIPE_DRIVERS = \ - $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ - $(GALLIUMDIR)/winsys/drm/intel/gem/libinteldrm.a \ +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/intel/gem/libinteldrm.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/i915/libi915.a -DRIVER_SOURCES = - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -DRIVER_EXTRAS = -ldrm_intel - -ASM_SOURCES = - -DRIVER_DEFINES = -I../gem $(shell pkg-config libdrm --atleast-version=2.3.1 \ - && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") - -include ../../Makefile.template - -symlinks: +include ../../Makefile.egl diff --git a/src/gallium/winsys/drm/intel/egl/dummy.c b/src/gallium/winsys/drm/intel/egl/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c index 5ed2a10af1c..377ed255131 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c @@ -1,3 +1,4 @@ +#include <stdio.h> #include "state_tracker/drm_api.h" @@ -175,18 +176,11 @@ intel_drm_create_screen(struct drm_api *api, int drmFD, idws->pools.gem = drm_intel_bufmgr_gem_init(idws->fd, idws->max_batch_size); drm_intel_bufmgr_gem_enable_reuse(idws->pools.gem); - idws->softpipe = FALSE; idws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE); return i915_create_screen(&idws->base, deviceID); } -static struct pipe_context * -intel_drm_create_context(struct drm_api *api, struct pipe_screen *screen) -{ - return i915_create_context(screen); -} - static void destroy(struct drm_api *api) { @@ -195,7 +189,8 @@ destroy(struct drm_api *api) struct drm_api intel_drm_api = { - .create_context = intel_drm_create_context, + .name = "i915", + .driver_name = "i915", .create_screen = intel_drm_create_screen, .texture_from_shared_handle = intel_drm_texture_from_shared_handle, .shared_handle_from_texture = intel_drm_shared_handle_from_texture, diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c index e8b58742ab7..102faedfeae 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c @@ -1,7 +1,8 @@ #include "intel_drm_winsys.h" #include "util/u_memory.h" -#include "pipe/p_refcnt.h" +#include "util/u_atomic.h" +#include "util/u_inlines.h" /** * Because gem does not have fence's we have to create our own fences. diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h b/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h index b4a60563ef4..9786ee93650 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h @@ -17,7 +17,6 @@ struct intel_drm_winsys { struct intel_winsys base; - boolean softpipe; boolean dump_cmd; int fd; /**< Drm file discriptor */ diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile index 0937f68c34b..7e95f79d03c 100644 --- a/src/gallium/winsys/drm/nouveau/dri/Makefile +++ b/src/gallium/winsys/drm/nouveau/dri/Makefile @@ -6,9 +6,6 @@ LIBNAME = nouveau_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nv04/libnv04.a \ - $(TOP)/src/gallium/drivers/nv10/libnv10.a \ - $(TOP)/src/gallium/drivers/nv20/libnv20.a \ $(TOP)/src/gallium/drivers/nv30/libnv30.a \ $(TOP)/src/gallium/drivers/nv40/libnv40.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index e5912ef77fe..c814d986b1d 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -2,6 +2,7 @@ #include "pipe/p_state.h" #include "util/u_format.h" #include "util/u_memory.h" +#include "util/u_inlines.h" #include "nouveau_drm_api.h" @@ -54,6 +55,15 @@ static struct dri1_api nouveau_dri1_api = { nouveau_dri1_front_surface, }; +static void +nouveau_drm_destroy_winsys(struct pipe_winsys *s) +{ + struct nouveau_winsys *nv_winsys = nouveau_winsys(s); + struct nouveau_screen *nv_screen= nouveau_screen(nv_winsys->pscreen); + nouveau_device_close(&nv_screen->device); + FREE(nv_winsys); +} + static struct pipe_screen * nouveau_drm_create_screen(struct drm_api *api, int fd, struct drm_create_screen_arg *arg) @@ -71,15 +81,6 @@ nouveau_drm_create_screen(struct drm_api *api, int fd, return NULL; switch (dev->chipset & 0xf0) { - case 0x00: - init = nv04_screen_create; - break; - case 0x10: - init = nv10_screen_create; - break; - case 0x20: - init = nv20_screen_create; - break; case 0x30: init = nv30_screen_create; break; @@ -105,6 +106,7 @@ nouveau_drm_create_screen(struct drm_api *api, int fd, return NULL; } ws = &nvws->base; + ws->destroy = nouveau_drm_destroy_winsys; nvws->pscreen = init(ws, dev); if (!nvws->pscreen) { @@ -140,58 +142,6 @@ nouveau_drm_create_screen(struct drm_api *api, int fd, return nvws->pscreen; } -static struct pipe_context * -nouveau_drm_create_context(struct drm_api *api, struct pipe_screen *pscreen) -{ - struct nouveau_winsys *nvws = nouveau_winsys_screen(pscreen); - struct pipe_context *(*init)(struct pipe_screen *, unsigned); - unsigned chipset = nouveau_screen(pscreen)->device->chipset; - int i; - - switch (chipset & 0xf0) { - case 0x00: - init = nv04_create; - break; - case 0x10: - init = nv10_create; - break; - case 0x20: - init = nv20_create; - break; - case 0x30: - init = nv30_create; - break; - case 0x40: - case 0x60: - init = nv40_create; - break; - case 0x50: - case 0x80: - case 0x90: - case 0xa0: - init = nv50_create; - break; - default: - debug_printf("%s: unknown chipset nv%02x\n", __func__, chipset); - return NULL; - } - - /* Find a free slot for a pipe context, allocate a new one if needed */ - for (i = 0; i < nvws->nr_pctx; i++) { - if (nvws->pctx[i] == NULL) - break; - } - - if (i == nvws->nr_pctx) { - nvws->nr_pctx++; - nvws->pctx = realloc(nvws->pctx, - sizeof(*nvws->pctx) * nvws->nr_pctx); - } - - nvws->pctx[i] = init(pscreen, i); - return nvws->pctx[i]; -} - static struct pipe_texture * nouveau_drm_pt_from_name(struct drm_api *api, struct pipe_screen *pscreen, struct pipe_texture *templ, const char *name, @@ -254,8 +204,9 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen, } struct drm_api drm_api_hooks = { + .name = "nouveau", + .driver_name = "nouveau", .create_screen = nouveau_drm_create_screen, - .create_context = nouveau_drm_create_context, .texture_from_shared_handle = nouveau_drm_pt_from_name, .shared_handle_from_texture = nouveau_drm_name_from_pt, .local_handle_from_texture = nouveau_drm_handle_from_pt, diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h index e61e0e0957a..a91aad7df8e 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h @@ -4,7 +4,7 @@ #include "state_tracker/drm_api.h" #include "state_tracker/dri1_api.h" -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "nouveau_dri.h" @@ -13,9 +13,6 @@ struct nouveau_winsys { struct pipe_screen *pscreen; - unsigned nr_pctx; - struct pipe_context **pctx; - struct pipe_surface *front; }; diff --git a/src/gallium/winsys/drm/nouveau/egl/Makefile b/src/gallium/winsys/drm/nouveau/egl/Makefile new file mode 100644 index 00000000000..2c352603320 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/egl/Makefile @@ -0,0 +1,16 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +EGL_DRIVER_NAME = nouveau +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_nouveau + +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ + $(TOP)/src/gallium/drivers/nv30/libnv30.a \ + $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a \ + $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + +include ../../Makefile.egl diff --git a/src/gallium/winsys/drm/nouveau/egl/dummy.c b/src/gallium/winsys/drm/nouveau/egl/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/egl/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/nouveau/xorg/Makefile b/src/gallium/winsys/drm/nouveau/xorg/Makefile index f0d3b337e83..179b50230b5 100644 --- a/src/gallium/winsys/drm/nouveau/xorg/Makefile +++ b/src/gallium/winsys/drm/nouveau/xorg/Makefile @@ -18,9 +18,6 @@ INCLUDES = \ LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nv04/libnv04.a \ - $(TOP)/src/gallium/drivers/nv10/libnv10.a \ - $(TOP)/src/gallium/drivers/nv20/libnv20.a \ $(TOP)/src/gallium/drivers/nv30/libnv30.a \ $(TOP)/src/gallium/drivers/nv40/libnv40.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 385fa857b56..3b1c3860a4c 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -33,7 +33,6 @@ #include "radeon_buffer.h" #include "radeon_bo_gem.h" -#include "softpipe/sp_texture.h" #include "r300_context.h" #include "util/u_format.h" #include "util/u_math.h" @@ -51,6 +50,26 @@ static const char *radeon_get_name(struct pipe_winsys *ws) return "Radeon/GEM+KMS"; } +static uint32_t radeon_domain_from_usage(unsigned usage) +{ + uint32_t domain = 0; + + if (usage & PIPE_BUFFER_USAGE_GPU_WRITE) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + if (usage & PIPE_BUFFER_USAGE_INDEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + + return domain; +} + static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, unsigned alignment, unsigned usage, @@ -58,6 +77,7 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, { struct radeon_winsys *radeon_ws = (struct radeon_winsys *)ws; struct radeon_pipe_buffer *radeon_buffer; + struct pb_desc desc; uint32_t domain; radeon_buffer = CALLOC_STRUCT(radeon_pipe_buffer); @@ -70,18 +90,16 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, radeon_buffer->base.usage = usage; radeon_buffer->base.size = size; - domain = 0; - - if (usage & PIPE_BUFFER_USAGE_PIXEL) { - domain |= RADEON_GEM_DOMAIN_VRAM; - } - if (usage & PIPE_BUFFER_USAGE_VERTEX) { - domain |= RADEON_GEM_DOMAIN_GTT; - } - if (usage & PIPE_BUFFER_USAGE_INDEX) { - domain |= RADEON_GEM_DOMAIN_GTT; + if (usage & PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) { + /* Don't bother allocating a BO, as it'll never get to the card. */ + desc.alignment = alignment; + desc.usage = usage; + radeon_buffer->pb = pb_malloc_buffer_create(size, &desc); + return &radeon_buffer->base; } + domain = radeon_domain_from_usage(usage); + radeon_buffer->bo = radeon_bo_open(radeon_ws->priv->bom, 0, size, alignment, domain, 0); if (radeon_buffer->bo == NULL) { @@ -133,8 +151,16 @@ static void radeon_buffer_del(struct pipe_buffer *buffer) struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; - radeon_bo_unref(radeon_buffer->bo); - free(radeon_buffer); + if (radeon_buffer->pb) { + pipe_reference_init(&radeon_buffer->pb->base.reference, 0); + pb_destroy(radeon_buffer->pb); + } + + if (radeon_buffer->bo) { + radeon_bo_unref(radeon_buffer->bo); + } + + FREE(radeon_buffer); } static void *radeon_buffer_map(struct pipe_winsys *ws, @@ -146,6 +172,10 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; + if (radeon_buffer->pb) { + return pb_map(radeon_buffer->pb, flags); + } + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { uint32_t domain; @@ -174,7 +204,31 @@ static void radeon_buffer_unmap(struct pipe_winsys *ws, struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; - radeon_bo_unmap(radeon_buffer->bo); + if (radeon_buffer->pb) { + pb_unmap(radeon_buffer->pb); + } else { + radeon_bo_unmap(radeon_buffer->bo); + } +} + +static void radeon_buffer_set_tiling(struct radeon_winsys *ws, + struct pipe_buffer *buffer, + uint32_t pitch, + boolean microtiled, + boolean macrotiled) +{ + struct radeon_pipe_buffer *radeon_buffer = + (struct radeon_pipe_buffer*)buffer; + uint32_t flags = 0; + + if (microtiled) { + flags |= RADEON_BO_FLAGS_MICRO_TILE; + } + if (macrotiled) { + flags |= RADEON_BO_FLAGS_MACRO_TILE; + } + + radeon_bo_set_tiling(radeon_buffer->bo, flags, pitch); } static void radeon_fence_reference(struct pipe_winsys *ws, @@ -197,55 +251,6 @@ static int radeon_fence_finish(struct pipe_winsys *ws, return 0; } -static void radeon_display_surface(struct pipe_winsys *pws, - struct pipe_surface *psurf, - struct radeon_vl_context *rvl_ctx) -{ - struct r300_texture *r300tex = (struct r300_texture *)(psurf->texture); - XImage *ximage; - void *data; - - ximage = XCreateImage(rvl_ctx->display, - XDefaultVisual(rvl_ctx->display, rvl_ctx->screen), - XDefaultDepth(rvl_ctx->display, rvl_ctx->screen), - ZPixmap, 0, /* format, offset */ - NULL, /* data */ - 0, 0, /* size */ - 32, /* bitmap_pad */ - 0); /* bytes_per_line */ - - assert(ximage->format); - assert(ximage->bitmap_unit); - - data = pws->buffer_map(pws, r300tex->buffer, 0); - - /* update XImage's fields */ - ximage->data = data; - ximage->width = psurf->width; - ximage->height = psurf->height; - ximage->bytes_per_line = psurf->width * (ximage->bits_per_pixel >> 3); - - XPutImage(rvl_ctx->display, rvl_ctx->drawable, - XDefaultGC(rvl_ctx->display, rvl_ctx->screen), - ximage, 0, 0, 0, 0, psurf->width, psurf->height); - - XSync(rvl_ctx->display, 0); - - ximage->data = NULL; - XDestroyImage(ximage); - - pws->buffer_unmap(pws, r300tex->buffer); -} - -static void radeon_flush_frontbuffer(struct pipe_winsys *pipe_winsys, - struct pipe_surface *pipe_surface, - void *context_private) -{ - struct radeon_vl_context *rvl_ctx; - rvl_ctx = (struct radeon_vl_context *) context_private; - radeon_display_surface(pipe_winsys, pipe_surface, rvl_ctx); -} - struct radeon_winsys* radeon_pipe_winsys(int fd) { struct radeon_winsys* radeon_ws; @@ -264,7 +269,7 @@ struct radeon_winsys* radeon_pipe_winsys(int fd) radeon_ws->priv->fd = fd; radeon_ws->priv->bom = radeon_bo_manager_gem_ctor(fd); - radeon_ws->base.flush_frontbuffer = radeon_flush_frontbuffer; + radeon_ws->base.flush_frontbuffer = NULL; /* overriden by co-state tracker */ radeon_ws->base.buffer_create = radeon_buffer_create; radeon_ws->base.user_buffer_create = radeon_buffer_user_create; @@ -279,5 +284,7 @@ struct radeon_winsys* radeon_pipe_winsys(int fd) radeon_ws->base.get_name = radeon_get_name; + radeon_ws->buffer_set_tiling = radeon_buffer_set_tiling; + return radeon_ws; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index d7f17564a9f..f1c8fc2a3b1 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -32,11 +32,11 @@ #include <stdio.h> -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_defines.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" -//#include "state_tracker/st_public.h" +#include "pipebuffer/pb_buffer.h" #include "util/u_memory.h" @@ -49,7 +49,10 @@ struct radeon_pipe_buffer { struct pipe_buffer base; + /* Pointer to GPU-backed BO. */ struct radeon_bo *bo; + /* Pointer to fallback PB buffer. */ + struct pb_buffer *pb; boolean flinked; uint32_t flink; }; diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 05194fc52a2..0c0e118ba3a 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -29,8 +29,6 @@ * Joakim Sindholt <[email protected]> */ -#include "softpipe/sp_winsys.h" - #include "radeon_drm.h" /* Helper function to do the ioctls needed for setup and init. */ @@ -40,12 +38,16 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) struct drm_radeon_info info = {0}; int target = 0; int retval; + drmVersionPtr version; info.value = (unsigned long)⌖ /* We do things in a specific order here. * - * First, the PCI ID. This is essential and should return usable numbers + * DRM version first. We need to be sure we're running on a KMS chipset. + * This is also for some features. + * + * Then, the PCI ID. This is essential and should return usable numbers * for all Radeons. If this fails, we probably got handed an FD for some * non-Radeon card. * @@ -55,8 +57,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) * * The GEM info is actually bogus on the kernel side, as well as our side * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because - * we don't actually use the info for anything yet. - * XXX update the above when we can safely use vram_size instead of vram_visible */ + * we don't actually use the info for anything yet. */ + + version = drmGetVersion(fd); + if (version->version_major != 2) { + fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " + "only compatible with 2.x.x\n", __FUNCTION__, + version->version_major, version->version_minor, + version->version_patchlevel); + drmFreeVersion(version); + exit(1); + } + info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { @@ -92,16 +104,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) exit(1); } winsys->gart_size = gem_info.gart_size; - /* XXX */ - winsys->vram_size = gem_info.vram_visible; -} - -/* Guess at whether this chipset should use r300g. - * - * I believe that this check is valid, but I haven't been exhaustive. */ -static boolean is_r3xx(int pciid) -{ - return (pciid > 0x3150) && (pciid < 0x796f); + winsys->vram_size = gem_info.vram_size; + + debug_printf("radeon: Successfully grabbed chipset info from kernel!\n" + "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n" + "radeon: GART size: %d MB VRAM size: %d MB\n", + version->version_major, version->version_minor, + version->version_patchlevel, winsys->pci_id, + winsys->gb_pipes, winsys->z_pipes, + winsys->gart_size / 1024 / 1024, + winsys->vram_size / 1024 / 1024); + + drmFreeVersion(version); } /* Create a pipe_screen. */ @@ -112,36 +126,27 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api, struct radeon_winsys* rwinsys = radeon_pipe_winsys(drmFB); do_ioctls(drmFB, rwinsys); - if (!is_r3xx(rwinsys->pci_id) || - debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) { - return softpipe_create_screen((struct pipe_winsys*)rwinsys); - } else { + /* The state tracker can organize a softpipe fallback if no hw + * driver is found. + */ + if (is_r3xx(rwinsys->pci_id)) { radeon_setup_winsys(drmFB, rwinsys); return r300_create_screen(rwinsys); - } -} - -/* Create a pipe_context. */ -struct pipe_context* radeon_create_context(struct drm_api* api, - struct pipe_screen* screen) -{ - struct radeon_winsys* rwinsys = (struct radeon_winsys*)screen->winsys; - - if (!is_r3xx(rwinsys->pci_id) || - debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) { - return softpipe_create(screen); } else { - return r300_create_context(screen, rwinsys); + FREE(rwinsys); + return NULL; } } + boolean radeon_buffer_from_texture(struct drm_api* api, + struct pipe_screen* screen, struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride) { /* XXX fix this */ - return r300_get_texture_buffer(texture, buffer, stride); + return r300_get_texture_buffer(screen, texture, buffer, stride); } /* Create a buffer from a handle. */ @@ -208,7 +213,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api, struct radeon_pipe_buffer* radeon_buffer; struct pipe_buffer *buffer = NULL; - if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { + if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) { return FALSE; } @@ -240,7 +245,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, unsigned *handle) { struct pipe_buffer *buffer = NULL; - if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { + if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) { return FALSE; } @@ -251,12 +256,19 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, return TRUE; } +static void radeon_drm_api_destroy(struct drm_api *api) +{ + return; +} + struct drm_api drm_api_hooks = { + .name = "radeon", + .driver_name = "radeon", .create_screen = radeon_create_screen, - .create_context = radeon_create_context, .texture_from_shared_handle = radeon_texture_from_shared_handle, .shared_handle_from_texture = radeon_shared_handle_from_texture, .local_handle_from_texture = radeon_local_handle_from_texture, + .destroy = radeon_drm_api_destroy, }; struct drm_api* drm_api_create() diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h index bf0e78138d7..8d74cbafc2f 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h @@ -52,10 +52,9 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB, struct drm_create_screen_arg *arg); -struct pipe_context* radeon_create_context(struct drm_api* api, - struct pipe_screen* screen); boolean radeon_buffer_from_texture(struct drm_api* api, + struct pipe_screen* screen, struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); @@ -76,4 +75,13 @@ boolean radeon_global_handle_from_buffer(struct drm_api* api, unsigned* handle); void radeon_destroy_drm_api(struct drm_api* api); + +/* Guess at whether this chipset should use r300g. + * + * I believe that this check is valid, but I haven't been exhaustive. */ +static INLINE boolean is_r3xx(int pciid) +{ + return (pciid > 0x3150) && (pciid < 0x796f); +} + #endif diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index 0875ee41cbf..d759beaba13 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -81,9 +81,13 @@ static void radeon_write_cs_reloc(struct radeon_winsys* winsys, uint32_t flags) { int retval = 0; + struct radeon_pipe_buffer* radeon_buffer = + (struct radeon_pipe_buffer*)pbuffer; - retval = radeon_cs_write_reloc(winsys->priv->cs, - ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags); + assert(!radeon_buffer->pb); + + retval = radeon_cs_write_reloc(winsys->priv->cs, radeon_buffer->bo, + rd, wd, flags); if (retval) { debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n", @@ -108,6 +112,11 @@ static void radeon_flush_cs(struct radeon_winsys* winsys) { int retval; + /* Don't flush a zero-sized CS. */ + if (!winsys->priv->cs->cdw) { + return; + } + /* Emit the CS. */ retval = radeon_cs_emit(winsys->priv->cs); if (retval) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h index 9edc9e038c3..4901080ca7b 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h @@ -30,7 +30,7 @@ #ifndef RADEON_WINSYS_H #define RADEON_WINSYS_H -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" struct radeon_winsys_priv; @@ -100,6 +100,12 @@ struct radeon_winsys { void (*flush_cb)(void *), void *data); void (*reset_bos)(struct radeon_winsys *winsys); + + void (*buffer_set_tiling)(struct radeon_winsys* winsys, + struct pipe_buffer* buffer, + uint32_t pitch, + boolean microtiled, + boolean macrotiled); }; #endif diff --git a/src/gallium/winsys/drm/radeon/dri/Makefile b/src/gallium/winsys/drm/radeon/dri/Makefile index a9889444de8..eaa34180321 100644 --- a/src/gallium/winsys/drm/radeon/dri/Makefile +++ b/src/gallium/winsys/drm/radeon/dri/Makefile @@ -2,7 +2,7 @@ TOP = ../../../../../.. include $(TOP)/configs/current -LIBNAME = radeon_dri.so +LIBNAME = radeong_dri.so MINIGLX_SOURCES = diff --git a/src/gallium/winsys/drm/radeon/egl/Makefile b/src/gallium/winsys/drm/radeon/egl/Makefile index 6a1448d1b9b..cd4f9b20f06 100644 --- a/src/gallium/winsys/drm/radeon/egl/Makefile +++ b/src/gallium/winsys/drm/radeon/egl/Makefile @@ -1,26 +1,14 @@ TOP = ../../../../../.. -GALLIUMDIR = ../../../.. include $(TOP)/configs/current -LIBNAME = EGL_r300.so +EGL_DRIVER_NAME = radeon +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_radeon -PIPE_DRIVERS = \ - $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ - $(GALLIUMDIR)/winsys/drm/radeon/core/libradeonwinsys.a \ +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/radeon/core/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/r300/libr300.a -DRIVER_SOURCES = - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -DRIVER_EXTRAS = -ldrm_radeon - -ASM_SOURCES = - -include ../../Makefile.template - -symlinks: +include ../../Makefile.egl diff --git a/src/gallium/winsys/drm/radeon/egl/dummy.c b/src/gallium/winsys/drm/radeon/egl/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/egl/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/radeon/python/radeon_hardpipe_winsys.c b/src/gallium/winsys/drm/radeon/python/radeon_hardpipe_winsys.c index c3ec24aaf78..fc63081a4cc 100644 --- a/src/gallium/winsys/drm/radeon/python/radeon_hardpipe_winsys.c +++ b/src/gallium/winsys/drm/radeon/python/radeon_hardpipe_winsys.c @@ -124,17 +124,9 @@ error: } -static struct pipe_context * -radeon_hardpipe_context_create(struct pipe_screen *screen) -{ - /* FIXME: create a radon pipe_context from screen */ - - return NULL; -} const struct st_winsys st_hardpipe_winsys = { &radeon_hardpipe_screen_create, - &radeon_hardpipe_context_create, }; diff --git a/src/gallium/winsys/drm/swrast/Makefile b/src/gallium/winsys/drm/swrast/Makefile new file mode 100644 index 00000000000..363b89584f2 --- /dev/null +++ b/src/gallium/winsys/drm/swrast/Makefile @@ -0,0 +1,12 @@ +# src/gallium/winsys/drm/swrast/Makefile +TOP = ../../../../.. +include $(TOP)/configs/current + +SUBDIRS = core $(GALLIUM_STATE_TRACKERS_DIRS) + +default install clean: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) $@) || exit 1; \ + fi \ + done diff --git a/src/gallium/winsys/drm/swrast/core/Makefile b/src/gallium/winsys/drm/swrast/core/Makefile new file mode 100644 index 00000000000..93931ae22b9 --- /dev/null +++ b/src/gallium/winsys/drm/swrast/core/Makefile @@ -0,0 +1,10 @@ +# src/gallium/winsys/drm/swrast/core/Makefile + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = swrastdrm + +C_SOURCES = swrast_drm_api.c + +include ../../../../Makefile.template diff --git a/src/gallium/winsys/drm/swrast/core/swrast_drm_api.c b/src/gallium/winsys/drm/swrast/core/swrast_drm_api.c new file mode 100644 index 00000000000..8c9f80e2c15 --- /dev/null +++ b/src/gallium/winsys/drm/swrast/core/swrast_drm_api.c @@ -0,0 +1,13 @@ +#include "state_tracker/drm_api.h" + +static struct drm_api swrast_drm_api = +{ + .name = "swrast", +}; + +struct drm_api * +drm_api_create() +{ + (void) swrast_drm_api; + return NULL; +} diff --git a/src/gallium/winsys/drm/swrast/egl/Makefile b/src/gallium/winsys/drm/swrast/egl/Makefile new file mode 100644 index 00000000000..26fe2d2805a --- /dev/null +++ b/src/gallium/winsys/drm/swrast/egl/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +EGL_DRIVER_NAME = swrast +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = + +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/swrast/core/libswrastdrm.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + +include ../../Makefile.egl diff --git a/src/gallium/winsys/drm/swrast/egl/dummy.c b/src/gallium/winsys/drm/swrast/egl/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/swrast/egl/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.c b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c index b812fb59d39..eca174a6c56 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_buffer.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c @@ -41,7 +41,7 @@ #include "svga_cmd.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" #include "pipebuffer/pb_buffer.h" #include "pipebuffer/pb_bufmgr.h" diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.h b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h index 634bdcabd26..41fb4476da5 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_buffer.h +++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h @@ -27,7 +27,7 @@ #ifndef VMW_BUFFER_H_ #define VMW_BUFFER_H_ - +#include <assert.h> #include "pipe/p_compiler.h" struct SVGAGuestPtr; diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.c b/src/gallium/winsys/drm/vmware/core/vmw_context.c index b6997588de4..90ffc4868f7 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_context.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_context.c @@ -41,9 +41,18 @@ #define VMW_COMMAND_SIZE (64*1024) #define VMW_SURFACE_RELOCS (1024) +#define VMW_REGION_RELOCS (512) #define VMW_MUST_FLUSH_STACK 8 +struct vmw_region_relocation +{ + struct SVGAGuestPtr *where; + struct pb_buffer *buffer; + /* TODO: put offset info inside where */ + uint32 offset; +}; + struct vmw_svga_winsys_context { struct svga_winsys_context base; @@ -69,10 +78,31 @@ struct vmw_svga_winsys_context uint32_t staged; uint32_t reserved; } surface; + + struct { + struct vmw_region_relocation relocs[VMW_REGION_RELOCS]; + uint32_t size; + uint32_t used; + uint32_t staged; + uint32_t reserved; + } region; struct pb_validate *validate; uint32_t last_fence; + + /** + * The amount of GMR that is referred by the commands currently batched + * in the context. + */ + uint32_t seen_regions; + + /** + * Whether this context should fail to reserve more commands, not because it + * ran out of command space, but because a substantial ammount of GMR was + * referred. + */ + boolean preemptive_flush; }; @@ -96,6 +126,19 @@ vmw_swc_flush(struct svga_winsys_context *swc, ret = pb_validate_validate(vswc->validate); assert(ret == PIPE_OK); if(ret == PIPE_OK) { + + /* Apply relocations */ + for(i = 0; i < vswc->region.used; ++i) { + struct vmw_region_relocation *reloc = &vswc->region.relocs[i]; + struct SVGAGuestPtr ptr; + + if(!vmw_gmr_bufmgr_region_ptr(reloc->buffer, &ptr)) + assert(0); + + ptr.offset += reloc->offset; + + *reloc->where = ptr; + } if (vswc->command.used) vmw_ioctl_command(vswc->vws, @@ -121,9 +164,18 @@ vmw_swc_flush(struct svga_winsys_context *swc, vswc->surface.used = 0; vswc->surface.reserved = 0; + for(i = 0; i < vswc->region.used + vswc->region.staged; ++i) { + pb_reference(&vswc->region.relocs[i].buffer, NULL); + } + + vswc->region.used = 0; + vswc->region.reserved = 0; + #ifdef DEBUG vswc->must_flush = FALSE; #endif + vswc->preemptive_flush = FALSE; + vswc->seen_regions = 0; if(pfence) *pfence = fence; @@ -151,8 +203,10 @@ vmw_swc_reserve(struct svga_winsys_context *swc, if(nr_bytes > vswc->command.size) return NULL; - if(vswc->command.used + nr_bytes > vswc->command.size || - vswc->surface.used + nr_relocs > vswc->surface.size) { + if(vswc->preemptive_flush || + vswc->command.used + nr_bytes > vswc->command.size || + vswc->surface.used + nr_relocs > vswc->surface.size || + vswc->region.used + nr_relocs > vswc->region.size) { #ifdef DEBUG vswc->must_flush = TRUE; debug_backtrace_capture(vswc->must_flush_stack, 1, @@ -163,11 +217,14 @@ vmw_swc_reserve(struct svga_winsys_context *swc, assert(vswc->command.used + nr_bytes <= vswc->command.size); assert(vswc->surface.used + nr_relocs <= vswc->surface.size); - + assert(vswc->region.used + nr_relocs <= vswc->region.size); + vswc->command.reserved = nr_bytes; vswc->surface.reserved = nr_relocs; vswc->surface.staged = 0; - + vswc->region.reserved = nr_relocs; + vswc->region.staged = 0; + return vswc->command.buffer + vswc->command.used; } @@ -206,20 +263,41 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, unsigned flags) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); - struct SVGAGuestPtr ptr; - struct pb_buffer *buf = vmw_pb_buffer(buffer); + struct vmw_region_relocation *reloc; enum pipe_error ret; + + assert(vswc->region.staged < vswc->region.reserved); - if(!vmw_gmr_bufmgr_region_ptr(buf, &ptr)) - assert(0); - - ptr.offset += offset; + reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged]; + reloc->where = where; + pb_reference(&reloc->buffer, vmw_pb_buffer(buffer)); + reloc->offset = offset; - *where = ptr; + ++vswc->region.staged; - ret = pb_validate_add_buffer(vswc->validate, buf, flags); + ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, flags); /* TODO: Update pipebuffer to reserve buffers and not fail here */ assert(ret == PIPE_OK); + + /* + * Flush preemptively the FIFO commands to keep the GMR working set within + * the GMR pool size. + * + * This is necessary for applications like SPECviewperf that generate huge + * amounts of immediate vertex data, so that we don't pile up too much of + * that vertex data neither in the guest nor in the host. + * + * Note that in the current implementation if a region is referred twice in + * a command stream, it will be accounted twice. We could detect repeated + * regions and count only once, but there is no incentive to do that, since + * regions are typically short-lived; always referred in a single command; + * and at the worst we just flush the commands a bit sooner, which for the + * SVGA virtual device it's not a performance issue since flushing commands + * to the FIFO won't cause flushing in the host. + */ + vswc->seen_regions += reloc->buffer->base.size; + if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/2) + vswc->preemptive_flush = TRUE; } @@ -238,6 +316,12 @@ vmw_swc_commit(struct svga_winsys_context *swc) vswc->surface.used += vswc->surface.staged; vswc->surface.staged = 0; vswc->surface.reserved = 0; + + assert(vswc->region.staged <= vswc->region.reserved); + assert(vswc->region.used + vswc->region.staged <= vswc->region.size); + vswc->region.used += vswc->region.staged; + vswc->region.staged = 0; + vswc->region.reserved = 0; } @@ -246,6 +330,11 @@ vmw_swc_destroy(struct svga_winsys_context *swc) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); unsigned i; + + for(i = 0; i < vswc->region.used; ++i) { + pb_reference(&vswc->region.relocs[i].buffer, NULL); + } + for(i = 0; i < vswc->surface.used; ++i) { p_atomic_dec(&vswc->surface.handles[i]->validated); vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL); @@ -279,6 +368,7 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws) vswc->command.size = VMW_COMMAND_SIZE; vswc->surface.size = VMW_SURFACE_RELOCS; + vswc->region.size = VMW_REGION_RELOCS; vswc->validate = pb_validate_create(); if(!vswc->validate) { @@ -290,8 +380,3 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws) } -struct pipe_context * -vmw_svga_context_create(struct pipe_screen *screen) -{ - return svga_context_create(screen); -} diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.h b/src/gallium/winsys/drm/vmware/core/vmw_context.h index 305ce9b5bec..d4884d24e99 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_context.h +++ b/src/gallium/winsys/drm/vmware/core/vmw_context.h @@ -52,8 +52,5 @@ struct pipe_screen; struct svga_winsys_context * vmw_svga_winsys_context_create(struct svga_winsys_screen *sws); -struct pipe_context * -vmw_svga_context_create(struct pipe_screen *screen); - #endif /* VMW_CONTEXT_H_ */ diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.c b/src/gallium/winsys/drm/vmware/core/vmw_screen.c index 911eec5e254..6cc9b382932 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.c @@ -37,13 +37,16 @@ * module. */ struct vmw_winsys_screen * -vmw_winsys_create( int fd ) +vmw_winsys_create( int fd, boolean use_old_scanout_flag ) { struct vmw_winsys_screen *vws = CALLOC_STRUCT(vmw_winsys_screen); if (!vws) goto out_no_vws; vws->ioctl.drm_fd = fd; + vws->use_old_scanout_flag = use_old_scanout_flag; + debug_printf("%s: use_old_scanout_flag == %s\n", __FUNCTION__, + use_old_scanout_flag ? "true" : "false"); if (!vmw_ioctl_init(vws)) goto out_no_ioctl; diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.h b/src/gallium/winsys/drm/vmware/core/vmw_screen.h index a875107370c..d3f2c2c7f56 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen.h +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.h @@ -40,6 +40,10 @@ #include "svga_winsys.h" + +#define VMW_GMR_POOL_SIZE (16*1024*1024) + + struct pb_manager; struct vmw_region; @@ -48,6 +52,8 @@ struct vmw_winsys_screen { struct svga_winsys_screen base; + boolean use_old_scanout_flag; + struct { volatile uint32_t *fifo_map; uint64_t last_fence; @@ -127,7 +133,7 @@ boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws); void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws); void vmw_pools_cleanup(struct vmw_winsys_screen *vws); -struct vmw_winsys_screen *vmw_winsys_create(int fd); +struct vmw_winsys_screen *vmw_winsys_create(int fd, boolean use_old_scanout_flag); void vmw_winsys_destroy(struct vmw_winsys_screen *sws); diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c index 5995eee34ba..1dcbc419dbb 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c @@ -25,8 +25,9 @@ #include "pipe/p_compiler.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_format.h" #include "vmw_screen.h" #include "trace/tr_drm.h" @@ -49,7 +50,8 @@ static struct dri1_api_version ddx_compat = { 0, 0, 0 }; static struct dri1_api_version dri_required = { 4, 0, 0 }; static struct dri1_api_version dri_compat = { 4, 0, 0 }; static struct dri1_api_version drm_required = { 0, 1, 0 }; -static struct dri1_api_version drm_compat = { 0, 0, 0 }; +static struct dri1_api_version drm_compat = { 1, 0, 0 }; +static struct dri1_api_version drm_scanout = { 0, 9, 0 }; static boolean vmw_dri1_check_version(const struct dri1_api_version *cur, @@ -84,6 +86,29 @@ vmw_drm_create_screen(struct drm_api *drm_api, struct vmw_winsys_screen *vws; struct pipe_screen *screen; struct dri1_create_screen_arg *dri1; + boolean use_old_scanout_flag = FALSE; + + if (!arg || arg->mode == DRM_CREATE_NORMAL) { + struct dri1_api_version drm_ver; + drmVersionPtr ver; + + ver = drmGetVersion(fd); + if (ver == NULL) + return NULL; + + drm_ver.major = ver->version_major; + drm_ver.minor = ver->version_minor; + drm_ver.patch_level = 0; /* ??? */ + + drmFreeVersion(ver); + if (!vmw_dri1_check_version(&drm_ver, &drm_required, + &drm_compat, "vmwgfx drm driver")) + return NULL; + + if (!vmw_dri1_check_version(&drm_ver, &drm_scanout, + &drm_compat, "use old scanout field (not a error)")) + use_old_scanout_flag = TRUE; + } if (arg != NULL) { switch (arg->mode) { @@ -100,6 +125,9 @@ vmw_drm_create_screen(struct drm_api *drm_api, if (!vmw_dri1_check_version(&dri1->drm_version, &drm_required, &drm_compat, "vmwgfx drm driver")) return NULL; + if (!vmw_dri1_check_version(&dri1->drm_version, &drm_scanout, + &drm_compat, "use old scanout field (not a error)")) + use_old_scanout_flag = TRUE; dri1->api = &dri1_api_hooks; break; default: @@ -107,7 +135,7 @@ vmw_drm_create_screen(struct drm_api *drm_api, } } - vws = vmw_winsys_create( fd ); + vws = vmw_winsys_create( fd, use_old_scanout_flag ); if (!vws) goto out_no_vws; @@ -220,22 +248,19 @@ vmw_dri1_present_locked(struct pipe_context *locked_pipe, vmw_svga_winsys_surface_reference(&vsrf, NULL); } -/** - * FIXME: We'd probably want to cache these buffers in the - * screen, based on handle. - */ - -static struct pipe_buffer * -vmw_drm_buffer_from_handle(struct drm_api *drm_api, - struct pipe_screen *screen, - const char *name, - unsigned handle) +static struct pipe_texture * +vmw_drm_texture_from_handle(struct drm_api *drm_api, + struct pipe_screen *screen, + struct pipe_texture *templat, + const char *name, + unsigned stride, + unsigned handle) { struct vmw_svga_winsys_surface *vsrf; struct svga_winsys_surface *ssrf; struct vmw_winsys_screen *vws = vmw_winsys_screen(svga_winsys_screen(screen)); - struct pipe_buffer *buf; + struct pipe_texture *tex; union drm_vmw_surface_reference_arg arg; struct drm_vmw_surface_arg *req = &arg.req; struct drm_vmw_surface_create_req *rep = &arg.rep; @@ -282,43 +307,28 @@ vmw_drm_buffer_from_handle(struct drm_api *drm_api, pipe_reference_init(&vsrf->refcnt, 1); p_atomic_set(&vsrf->validated, 0); + vsrf->screen = vws; vsrf->sid = handle; ssrf = svga_winsys_surface(vsrf); - buf = svga_screen_buffer_wrap_surface(screen, rep->format, ssrf); - if (!buf) + tex = svga_screen_texture_wrap_surface(screen, templat, rep->format, ssrf); + if (!tex) vmw_svga_winsys_surface_reference(&vsrf, NULL); - return buf; + return tex; out_mip: vmw_ioctl_surface_destroy(vws, handle); return NULL; } -static struct pipe_texture * -vmw_drm_texture_from_handle(struct drm_api *drm_api, - struct pipe_screen *screen, - struct pipe_texture *templat, - const char *name, - unsigned stride, - unsigned handle) -{ - struct pipe_buffer *buffer; - buffer = vmw_drm_buffer_from_handle(drm_api, screen, name, handle); - - if (!buffer) - return NULL; - - return screen->texture_blanket(screen, templat, &stride, buffer); -} - static boolean -vmw_drm_handle_from_buffer(struct drm_api *drm_api, +vmw_drm_handle_from_texture(struct drm_api *drm_api, struct pipe_screen *screen, - struct pipe_buffer *buffer, + struct pipe_texture *texture, + unsigned *stride, unsigned *handle) { struct svga_winsys_surface *surface = - svga_screen_buffer_get_winsys_surface(buffer); + svga_screen_texture_get_winsys_surface(texture); struct vmw_svga_winsys_surface *vsrf; if (!surface) @@ -326,31 +336,13 @@ vmw_drm_handle_from_buffer(struct drm_api *drm_api, vsrf = vmw_svga_winsys_surface(surface); *handle = vsrf->sid; + *stride = util_format_get_nblocksx(texture->format, texture->width0) * + util_format_get_blocksize(texture->format); + vmw_svga_winsys_surface_reference(&vsrf, NULL); return TRUE; } -static boolean -vmw_drm_handle_from_texture(struct drm_api *drm_api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) -{ - struct pipe_buffer *buffer; - - if (!svga_screen_buffer_from_texture(texture, &buffer, stride)) - return FALSE; - - return vmw_drm_handle_from_buffer(drm_api, screen, buffer, handle); -} - -static struct pipe_context* -vmw_drm_create_context(struct drm_api *drm_api, - struct pipe_screen *screen) -{ - return vmw_svga_context_create(screen); -} static struct dri1_api dri1_api_hooks = { .front_srf_locked = NULL, @@ -358,8 +350,9 @@ static struct dri1_api dri1_api_hooks = { }; static struct drm_api vmw_drm_api_hooks = { + .name = "vmwgfx", + .driver_name = "vmwgfx", .create_screen = vmw_drm_create_screen, - .create_context = vmw_drm_create_context, .texture_from_shared_handle = vmw_drm_texture_from_handle, .shared_handle_from_texture = vmw_drm_handle_from_texture, .local_handle_from_texture = vmw_drm_handle_from_texture, diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c index ccd0b418a16..5d81fa8c4a6 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c @@ -57,6 +57,12 @@ struct vmw_region uint32_t size; }; +/* XXX: This isn't a real hardware flag, but just a hack for kernel to + * know about primary surfaces. In newer versions of the kernel + * interface the driver uses a special field. + */ +#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) + static void vmw_check_last_cmd(struct vmw_winsys_screen *vws) { @@ -169,7 +175,17 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws, vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format); memset(&s_arg, 0, sizeof(s_arg)); - req->flags = (uint32_t) flags; + if (vws->use_old_scanout_flag && + (flags & SVGA3D_SURFACE_HINT_SCANOUT)) { + req->flags = (uint32_t) flags; + req->scanout = false; + } else if (flags & SVGA3D_SURFACE_HINT_SCANOUT) { + req->flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT); + req->scanout = true; + } else { + req->flags = (uint32_t) flags; + req->scanout = false; + } req->format = (uint32_t) format; req->shareable = 1; diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c index b1c24b0cb6a..b9823d78575 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c @@ -53,14 +53,32 @@ vmw_pools_init(struct vmw_winsys_screen *vws) goto error; vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr, - 16*1024*1024, + VMW_GMR_POOL_SIZE, 12 /* 4096 alignment */); if(!vws->pools.gmr_mm) goto error; + /* + * GMR buffers are typically shortlived, but it's possible that at a given + * instance a buffer is mapped. So to avoid stalling we tell pipebuffer to + * forbid creation of buffers beyond half the GMR pool size, + * + * XXX: It is unclear weather we want to limit the total amount of temporary + * malloc memory used to backup unvalidated GMR buffers. On one hand it is + * preferrable to fail an allocation than exhausting the guest memory with + * temporary data, but on the other hand it is possible that a stupid + * application creates large vertex buffers and does not use them for a long + * time -- since the svga pipe driver only emits the DMA uploads when a + * buffer is used for drawing this would effectively disabling swapping GMR + * buffers to memory. So far, the preemptively flush already seems to keep + * total allocated memory within relatively small numbers, so we don't + * limit. + */ vws->pools.gmr_fenced = fenced_bufmgr_create( vws->pools.gmr_mm, - vmw_fence_ops_create(vws)); + vmw_fence_ops_create(vws), + VMW_GMR_POOL_SIZE/2, + ~0); #ifdef DEBUG vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced, diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c index d7d008859b3..2b4e80f0039 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c @@ -36,7 +36,7 @@ #include "svga_cmd.h" #include "svga3d_caps.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "pipebuffer/pb_buffer.h" diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.h b/src/gallium/winsys/drm/vmware/core/vmw_surface.h index 340cc1532e0..3d61595c288 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_surface.h +++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.h @@ -36,8 +36,8 @@ #include "pipe/p_compiler.h" -#include "pipe/p_atomic.h" -#include "pipe/p_refcnt.h" +#include "util/u_atomic.h" +#include "util/u_inlines.h" #define VMW_MAX_PRESENTS 3 @@ -45,7 +45,7 @@ struct vmw_svga_winsys_surface { - struct pipe_atomic validated; + int32_t validated; /* atomic */ struct pipe_reference refcnt; struct vmw_winsys_screen *screen; diff --git a/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h index 2be7e1249b6..1457966db81 100644 --- a/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h +++ b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h @@ -68,7 +68,8 @@ #define DRM_VMW_PARAM_NUM_FREE_STREAMS 1 #define DRM_VMW_PARAM_3D 2 #define DRM_VMW_PARAM_FIFO_OFFSET 3 - +#define DRM_VMW_PARAM_HW_CAPS 4 +#define DRM_VMW_PARAM_FIFO_CAPS 5 /** * struct drm_vmw_getparam_arg @@ -87,49 +88,6 @@ struct drm_vmw_getparam_arg { /*************************************************************************/ /** - * DRM_VMW_EXTENSION - Query device extensions. - */ - -/** - * struct drm_vmw_extension_rep - * - * @exists: The queried extension exists. - * @driver_ioctl_offset: Ioctl number of the first ioctl in the extension. - * @driver_sarea_offset: Offset to any space in the DRI SAREA - * used by the extension. - * @major: Major version number of the extension. - * @minor: Minor version number of the extension. - * @pl: Patch level version number of the extension. - * - * Output argument to the DRM_VMW_EXTENSION Ioctl. - */ - -struct drm_vmw_extension_rep { - int32_t exists; - uint32_t driver_ioctl_offset; - uint32_t driver_sarea_offset; - uint32_t major; - uint32_t minor; - uint32_t pl; - uint32_t pad64; -}; - -/** - * union drm_vmw_extension_arg - * - * @extension - Ascii name of the extension to be queried. //In - * @rep - Reply as defined above. //Out - * - * Argument to the DRM_VMW_EXTENSION Ioctl. - */ - -union drm_vmw_extension_arg { - char extension[DRM_VMW_EXT_NAME_LEN]; - struct drm_vmw_extension_rep rep; -}; - -/*************************************************************************/ -/** * DRM_VMW_CREATE_CONTEXT - Create a host context. * * Allocates a device unique context id, and queues a create context command @@ -181,6 +139,8 @@ struct drm_vmw_context_arg { * The size of the array should equal the total number of mipmap levels. * @shareable: Boolean whether other clients (as identified by file descriptors) * may reference this surface. + * @scanout: Boolean whether the surface is intended to be used as a + * scanout. * * Input data to the DRM_VMW_CREATE_SURFACE Ioctl. * Output data from the DRM_VMW_REF_SURFACE Ioctl. @@ -192,7 +152,7 @@ struct drm_vmw_surface_create_req { uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES]; uint64_t size_addr; int32_t shareable; - uint32_t pad64; + int32_t scanout; }; /** @@ -295,6 +255,9 @@ union drm_vmw_surface_reference_arg { * * @commands: User-space address of a command buffer cast to an uint64_t. * @command-size: Size in bytes of the command buffer. + * @throttle-us: Sleep until software is less than @throttle_us + * microseconds ahead of hardware. The driver may round this value + * to the nearest kernel tick. * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an * uint64_t. * @@ -304,7 +267,7 @@ union drm_vmw_surface_reference_arg { struct drm_vmw_execbuf_arg { uint64_t commands; uint32_t command_size; - uint32_t pad64; + uint32_t throttle_us; uint64_t fence_rep; }; diff --git a/src/gallium/winsys/drm/vmware/egl/Makefile b/src/gallium/winsys/drm/vmware/egl/Makefile index 8e2980c318c..a3e73131c35 100644 --- a/src/gallium/winsys/drm/vmware/egl/Makefile +++ b/src/gallium/winsys/drm/vmware/egl/Makefile @@ -1,18 +1,14 @@ - TOP = ../../../../../.. include $(TOP)/configs/current -LIBNAME = EGL_svga.so +EGL_DRIVER_NAME = vmwgfx +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -PIPE_DRIVERS = \ - $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ +EGL_DRIVER_PIPES = \ $(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/svga/libsvga.a -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) - -include ../../Makefile.template - -symlinks: +include ../../Makefile.egl diff --git a/src/gallium/winsys/drm/vmware/egl/dummy.c b/src/gallium/winsys/drm/vmware/egl/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/egl/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_video.c b/src/gallium/winsys/drm/vmware/xorg/vmw_video.c index b065b96346a..ff3b992d078 100644 --- a/src/gallium/winsys/drm/vmware/xorg/vmw_video.c +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_video.c @@ -649,7 +649,8 @@ vmw_video_port_play(ScrnInfoPtr pScrn, struct vmw_video_port *port, return XvBadAlloc; } - port->currBuf = ++port->currBuf & (VMWARE_VID_NUM_BUFFERS - 1); + if (++(port->currBuf) >= VMWARE_VID_NUM_BUFFERS) + port->currBuf = 0; return Success; } diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c index 4b208719ca3..cd273d091fe 100644 --- a/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c @@ -34,10 +34,10 @@ #include "vmw_hook.h" static void vmw_xorg_identify(int flags); -static Bool vmw_xorg_pci_probe(DriverPtr driver, - int entity_num, - struct pci_device *device, - intptr_t match_data); +_X_EXPORT Bool vmw_xorg_pci_probe(DriverPtr driver, + int entity_num, + struct pci_device *device, + intptr_t match_data); static const struct pci_id_match vmw_xorg_device_match[] = { {0x15ad, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0}, @@ -126,7 +126,7 @@ vmw_xorg_identify(int flags) vmw_xorg_chipsets); } -static Bool +_X_EXPORT Bool vmw_xorg_pci_probe(DriverPtr driver, int entity_num, struct pci_device *device, intptr_t match_data) { diff --git a/src/gallium/winsys/egl_xlib/Makefile b/src/gallium/winsys/egl_xlib/Makefile deleted file mode 100644 index 3efb7ed4afa..00000000000 --- a/src/gallium/winsys/egl_xlib/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -# src/gallium/winsys/egl_xlib/Makefile - -# Build softpipe/xlib/EGL driver library/object: "egl_softpipe.so" - - -TOP = ../../../.. -include $(TOP)/configs/current - - -DRIVER_NAME = egl_softpipe.so - - -INCLUDE_DIRS = \ - -I$(TOP)/include \ - -I$(TOP)/src/egl/main \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary - -WINSYS_SOURCES = \ - egl_xlib.c \ - sw_winsys.c - -WINSYS_OBJECTS = $(WINSYS_SOURCES:.c=.o) - - -LIBS = \ - $(GALLIUM_DRIVERS) \ - $(GALLIUM_AUXILIARIES) - -# XXX temporary (should create a separate lib with the GL API funcs and -# mesa code, as done for ES 1.x, 2.x, OpenVG, etc) -UNUSED_LIBS = \ - $(TOP)/src/mesa/libglapi.a \ - $(TOP)/src/mesa/libmesagallium.a \ - - -LOCAL_CFLAGS = - - -.c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(LOCAL_CFLAGS) $< -o $@ - - -.PHONY: library - - -default: depend library Makefile - - -library: $(TOP)/$(LIB_DIR)/$(DRIVER_NAME) - - -# Make the egl_softpipe.so library -$(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(WINSYS_OBJECTS) $(LIBS) - $(TOP)/bin/mklib -o $(DRIVER_NAME) \ - -linker "$(CC)" \ - -noprefix \ - -install $(TOP)/$(LIB_DIR) \ - $(MKLIB_OPTIONS) $(WINSYS_OBJECTS) \ - -Wl,--whole-archive $(LIBS) -Wl,--no-whole-archive - - -depend: $(ALL_SOURCES) - @ echo "running $(MKDEP)" - @ rm -f depend # workaround oops on gutsy?!? - @ touch depend - @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ - > /dev/null 2>/dev/null - - -install: default - $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) - @if [ -e $(TOP)/$(LIB_DIR) ]; then \ - $(MINSTALL) $(TOP)/$(LIB_DIR)/$(DRIVER_NAME) $(INSTALL_DIR)/$(LIB_DIR); \ - fi - - -# Emacs tags -tags: - etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h - -clean: - -rm -f *.o *~ *.bak - - -include depend diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c deleted file mode 100644 index 420dccc92c9..00000000000 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ /dev/null @@ -1,853 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * EGL / softpipe / xlib winsys module - * - * Authors: Brian Paul - */ - - -#include <dlfcn.h> -#include <X11/Xlib.h> -#include <X11/Xutil.h> - -#include "pipe/p_compiler.h" -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "softpipe/sp_winsys.h" -#include "softpipe/sp_texture.h" - -#include "eglconfig.h" -#include "eglconfigutil.h" -#include "eglcontext.h" -#include "egldisplay.h" -#include "egldriver.h" -#include "eglglobals.h" -#include "egllog.h" -#include "eglsurface.h" - -#include "state_tracker/st_public.h" - -#include "sw_winsys.h" - - -/** subclass of _EGLDriver */ -struct xlib_egl_driver -{ - _EGLDriver Base; /**< base class */ - EGLint apis; -}; - - -/** driver data of _EGLDisplay */ -struct xlib_egl_display -{ - Display *Dpy; - - struct pipe_winsys *winsys; - struct pipe_screen *screen; -}; - - -/** subclass of _EGLContext */ -struct xlib_egl_context -{ - _EGLContext Base; /**< base class */ - - struct pipe_context *pipe; /**< Gallium driver context */ - struct st_context *Context; /**< Mesa/gallium state tracker context */ -}; - - -/** subclass of _EGLSurface */ -struct xlib_egl_surface -{ - _EGLSurface Base; /**< base class */ - - /* These are set for window surface */ - Display *Dpy; /**< The X Display of the window */ - Window Win; /**< The user-created window ID */ - GC Gc; - XVisualInfo VisInfo; - - struct pipe_winsys *winsys; - - struct st_framebuffer *Framebuffer; -}; - - -static void -flush_frontbuffer(struct pipe_winsys *pws, - struct pipe_surface *psurf, - void *context_private); - - -/** cast wrapper */ -static INLINE struct xlib_egl_driver * -xlib_egl_driver(_EGLDriver *drv) -{ - return (struct xlib_egl_driver *) drv; -} - - -static INLINE struct xlib_egl_display * -xlib_egl_display(_EGLDisplay *dpy) -{ - return (struct xlib_egl_display *) dpy->DriverData; -} - - -static INLINE struct xlib_egl_surface * -lookup_surface(_EGLSurface *surf) -{ - return (struct xlib_egl_surface *) surf; -} - - -static INLINE struct xlib_egl_context * -lookup_context(_EGLContext *ctx) -{ - return (struct xlib_egl_context *) ctx; -} - - -/** - * Create the EGLConfigs. (one per X visual) - */ -static void -create_configs(struct xlib_egl_display *xdpy, _EGLDisplay *disp) -{ - static const EGLint all_apis = (EGL_OPENGL_ES_BIT | - EGL_OPENGL_ES2_BIT | - EGL_OPENVG_BIT | - EGL_OPENGL_BIT); - XVisualInfo *visInfo, visTemplate; - int num_visuals, i; - - /* get list of all X visuals, create an EGL config for each */ - visTemplate.screen = DefaultScreen(xdpy->Dpy); - visInfo = XGetVisualInfo(xdpy->Dpy, VisualScreenMask, - &visTemplate, &num_visuals); - if (!visInfo) { - printf("egl_xlib.c: couldn't get any X visuals\n"); - abort(); - } - - for (i = 0; i < num_visuals; i++) { - _EGLConfig *config = calloc(1, sizeof(_EGLConfig)); - int id = i + 1; - int rbits = util_bitcount(visInfo[i].red_mask); - int gbits = util_bitcount(visInfo[i].green_mask); - int bbits = util_bitcount(visInfo[i].blue_mask); - int abits = bbits == 8 ? 8 : 0; - int zbits = 24; - int sbits = 8; - int visid = visInfo[i].visualid; -#if defined(__cplusplus) || defined(c_plusplus) - int vistype = visInfo[i].c_class; -#else - int vistype = visInfo[i].class; -#endif - - _eglInitConfig(config, id); - SET_CONFIG_ATTRIB(config, EGL_BUFFER_SIZE, rbits + gbits + bbits + abits); - SET_CONFIG_ATTRIB(config, EGL_RED_SIZE, rbits); - SET_CONFIG_ATTRIB(config, EGL_GREEN_SIZE, gbits); - SET_CONFIG_ATTRIB(config, EGL_BLUE_SIZE, bbits); - SET_CONFIG_ATTRIB(config, EGL_ALPHA_SIZE, abits); - SET_CONFIG_ATTRIB(config, EGL_DEPTH_SIZE, zbits); - SET_CONFIG_ATTRIB(config, EGL_STENCIL_SIZE, sbits); - SET_CONFIG_ATTRIB(config, EGL_NATIVE_VISUAL_ID, visid); - SET_CONFIG_ATTRIB(config, EGL_NATIVE_VISUAL_TYPE, vistype); - SET_CONFIG_ATTRIB(config, EGL_NATIVE_RENDERABLE, EGL_FALSE); - SET_CONFIG_ATTRIB(config, EGL_CONFORMANT, all_apis); - SET_CONFIG_ATTRIB(config, EGL_RENDERABLE_TYPE, all_apis); - SET_CONFIG_ATTRIB(config, EGL_SURFACE_TYPE, EGL_WINDOW_BIT | EGL_PBUFFER_BIT); - SET_CONFIG_ATTRIB(config, EGL_BIND_TO_TEXTURE_RGBA, EGL_TRUE); - SET_CONFIG_ATTRIB(config, EGL_BIND_TO_TEXTURE_RGB, EGL_TRUE); - - _eglAddConfig(disp, config); - } - - XFree(visInfo); -} - - -/** - * Called via eglInitialize(), drv->API.Initialize(). - */ -static EGLBoolean -xlib_eglInitialize(_EGLDriver *drv, _EGLDisplay *dpy, - EGLint *major, EGLint *minor) -{ - struct xlib_egl_driver *xdrv = xlib_egl_driver(drv); - struct xlib_egl_display *xdpy; - - xdpy = CALLOC_STRUCT(xlib_egl_display); - if (!xdpy) - return _eglError(EGL_BAD_ALLOC, "eglInitialize"); - - xdpy->Dpy = (Display *) dpy->NativeDisplay; - if (!xdpy->Dpy) { - xdpy->Dpy = XOpenDisplay(NULL); - if (!xdpy->Dpy) { - free(xdpy); - return EGL_FALSE; - } - } - - /* create winsys and pipe screen */ - xdpy->winsys = create_sw_winsys(); - if (!xdpy->winsys) { - free(xdpy); - return _eglError(EGL_BAD_ALLOC, "eglInitialize"); - } - xdpy->winsys->flush_frontbuffer = flush_frontbuffer; - xdpy->screen = softpipe_create_screen(xdpy->winsys); - if (!xdpy->screen) { - free(xdpy->winsys); - free(xdpy); - return _eglError(EGL_BAD_ALLOC, "eglInitialize"); - } - - dpy->DriverData = (void *) xdpy; - dpy->ClientAPIsMask = xdrv->apis; - - create_configs(xdpy, dpy); - - /* we're supporting EGL 1.4 */ - *major = 1; - *minor = 4; - - return EGL_TRUE; -} - - -/** - * Called via eglTerminate(), drv->API.Terminate(). - */ -static EGLBoolean -xlib_eglTerminate(_EGLDriver *drv, _EGLDisplay *dpy) -{ - struct xlib_egl_display *xdpy = xlib_egl_display(dpy); - - _eglReleaseDisplayResources(drv, dpy); - _eglCleanupDisplay(dpy); - - xdpy->screen->destroy(xdpy->screen); - free(xdpy->winsys); - - if (!dpy->NativeDisplay) - XCloseDisplay(xdpy->Dpy); - free(xdpy); - - return EGL_TRUE; -} - - -static _EGLProc -xlib_eglGetProcAddress(const char *procname) -{ - return (_EGLProc) st_get_proc_address(procname); -} - - -static void -get_drawable_visual_info(Display *dpy, Drawable d, XVisualInfo *visInfo) -{ - XWindowAttributes attr; - XVisualInfo visTemp, *vis; - int num_visuals; - - XGetWindowAttributes(dpy, d, &attr); - - visTemp.screen = DefaultScreen(dpy); - visTemp.visualid = attr.visual->visualid; - vis = XGetVisualInfo(dpy, - (VisualScreenMask | VisualIDMask), - &visTemp, &num_visuals); - if (vis) - *visInfo = *vis; - - XFree(vis); -} - - - -/** Get size of given window */ -static Status -get_drawable_size(Display *dpy, Drawable d, uint *width, uint *height) -{ - Window root; - Status stat; - int xpos, ypos; - unsigned int w, h, bw, depth; - stat = XGetGeometry(dpy, d, &root, &xpos, &ypos, &w, &h, &bw, &depth); - *width = w; - *height = h; - return stat; -} - - -static void -check_and_update_buffer_size(struct xlib_egl_surface *surface) -{ - uint width, height; - if (surface->Base.Type == EGL_PBUFFER_BIT) { - width = surface->Base.Width; - height = surface->Base.Height; - } - else { - get_drawable_size(surface->Dpy, surface->Win, &width, &height); - } - st_resize_framebuffer(surface->Framebuffer, width, height); - surface->Base.Width = width; - surface->Base.Height = height; -} - - - -static void -display_surface(struct pipe_winsys *pws, - struct pipe_surface *psurf, - struct xlib_egl_surface *xsurf) -{ - struct softpipe_texture *spt = softpipe_texture(psurf->texture); - XImage *ximage; - void *data; - - if (xsurf->Base.Type == EGL_PBUFFER_BIT) - return; - - ximage = XCreateImage(xsurf->Dpy, - xsurf->VisInfo.visual, - xsurf->VisInfo.depth, - ZPixmap, 0, /* format, offset */ - NULL, /* data */ - 0, 0, /* size */ - 32, /* bitmap_pad */ - 0); /* bytes_per_line */ - - - assert(ximage->format); - assert(ximage->bitmap_unit); - - data = pws->buffer_map(pws, spt->buffer, 0); - - /* update XImage's fields */ - ximage->data = data; - ximage->width = psurf->width; - ximage->height = psurf->height; - ximage->bytes_per_line = spt->stride[psurf->level]; - - XPutImage(xsurf->Dpy, xsurf->Win, xsurf->Gc, - ximage, 0, 0, 0, 0, psurf->width, psurf->height); - - XSync(xsurf->Dpy, 0); - - ximage->data = NULL; - XDestroyImage(ximage); - - pws->buffer_unmap(pws, spt->buffer); -} - - - -/** Display gallium surface in X window */ -static void -flush_frontbuffer(struct pipe_winsys *pws, - struct pipe_surface *psurf, - void *context_private) -{ - struct xlib_egl_surface *xsurf = (struct xlib_egl_surface *) context_private; - display_surface(pws, psurf, xsurf); -} - - - -/** - * Called via eglCreateContext(), drv->API.CreateContext(). - */ -static _EGLContext * -xlib_eglCreateContext(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, - _EGLContext *share_list, const EGLint *attrib_list) -{ - struct xlib_egl_display *xdpy = xlib_egl_display(dpy); - struct xlib_egl_context *ctx; - struct st_context *share_ctx = NULL; /* XXX fix */ - __GLcontextModes visual; - - ctx = CALLOC_STRUCT(xlib_egl_context); - if (!ctx) - return NULL; - - /* let EGL lib init the common stuff */ - if (!_eglInitContext(drv, &ctx->Base, conf, attrib_list)) { - free(ctx); - return NULL; - } - - /* API-dependent context creation */ - switch (ctx->Base.ClientAPI) { - case EGL_OPENVG_API: - case EGL_OPENGL_ES_API: - _eglLog(_EGL_DEBUG, "Create Context for ES version %d\n", - ctx->Base.ClientVersion); - /* fall-through */ - case EGL_OPENGL_API: - /* create a softpipe context */ - ctx->pipe = softpipe_create(xdpy->screen); - /* Now do xlib / state tracker inits here */ - _eglConfigToContextModesRec(conf, &visual); - ctx->Context = st_create_context(ctx->pipe, &visual, share_ctx); - break; - default: - _eglError(EGL_BAD_MATCH, "eglCreateContext(unsupported API)"); - free(ctx); - return NULL; - } - - return &ctx->Base; -} - - -static EGLBoolean -xlib_eglDestroyContext(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) -{ - struct xlib_egl_context *context = lookup_context(ctx); - - if (!_eglIsContextBound(&context->Base)) { - /* API-dependent clean-up */ - switch (context->Base.ClientAPI) { - case EGL_OPENGL_ES_API: - case EGL_OPENVG_API: - /* fall-through */ - case EGL_OPENGL_API: - st_destroy_context(context->Context); - break; - default: - assert(0); - } - free(context); - } - return EGL_TRUE; -} - - -/** - * Called via eglMakeCurrent(), drv->API.MakeCurrent(). - */ -static EGLBoolean -xlib_eglMakeCurrent(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLSurface *draw, _EGLSurface *read, _EGLContext *ctx) -{ - struct xlib_egl_context *context = lookup_context(ctx); - struct xlib_egl_surface *draw_surf = lookup_surface(draw); - struct xlib_egl_surface *read_surf = lookup_surface(read); - struct st_context *oldcontext = NULL; - _EGLContext *oldctx; - - oldctx = _eglGetCurrentContext(); - if (oldctx && _eglIsContextLinked(oldctx)) - oldcontext = st_get_current(); - - if (!_eglMakeCurrent(drv, dpy, draw, read, ctx)) - return EGL_FALSE; - - /* Flush before switching context. Check client API? */ - if (oldcontext) - st_flush(oldcontext, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); - st_make_current((context ? context->Context : NULL), - (draw_surf ? draw_surf->Framebuffer : NULL), - (read_surf ? read_surf->Framebuffer : NULL)); - - if (draw_surf) - check_and_update_buffer_size(draw_surf); - if (read_surf && read_surf != draw_surf) - check_and_update_buffer_size(draw_surf); - - return EGL_TRUE; -} - - -static enum pipe_format -choose_color_format(const __GLcontextModes *visual) -{ - if (visual->redBits == 8 && - visual->greenBits == 8 && - visual->blueBits == 8 && - visual->alphaBits == 8) { - /* XXX this really also depends on the ordering of R,G,B,A */ - return PIPE_FORMAT_A8R8G8B8_UNORM; - } - else { - assert(0); - return PIPE_FORMAT_NONE; - } -} - - -static enum pipe_format -choose_depth_format(const __GLcontextModes *visual) -{ - if (visual->depthBits > 0) - return PIPE_FORMAT_S8Z24_UNORM; - else - return PIPE_FORMAT_NONE; -} - - -static enum pipe_format -choose_stencil_format(const __GLcontextModes *visual) -{ - if (visual->stencilBits > 0) - return PIPE_FORMAT_S8Z24_UNORM; - else - return PIPE_FORMAT_NONE; -} - - -/** - * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). - */ -static _EGLSurface * -xlib_eglCreateWindowSurface(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, - NativeWindowType window, const EGLint *attrib_list) -{ - struct xlib_egl_display *xdpy = xlib_egl_display(disp); - struct xlib_egl_surface *surf; - __GLcontextModes visual; - uint width, height; - - surf = CALLOC_STRUCT(xlib_egl_surface); - if (!surf) - return NULL; - - /* Let EGL lib init the common stuff */ - if (!_eglInitSurface(drv, &surf->Base, EGL_WINDOW_BIT, - conf, attrib_list)) { - free(surf); - return NULL; - } - - /* - * Now init the Xlib and gallium stuff - */ - surf->Win = (Window) window; /* The X window ID */ - surf->Dpy = xdpy->Dpy; /* The X display */ - surf->Gc = XCreateGC(surf->Dpy, surf->Win, 0, NULL); - - surf->winsys = xdpy->winsys; - - _eglConfigToContextModesRec(conf, &visual); - get_drawable_size(surf->Dpy, surf->Win, &width, &height); - get_drawable_visual_info(surf->Dpy, surf->Win, &surf->VisInfo); - - surf->Base.Width = width; - surf->Base.Height = height; - - /* Create GL statetracker framebuffer */ - surf->Framebuffer = st_create_framebuffer(&visual, - choose_color_format(&visual), - choose_depth_format(&visual), - choose_stencil_format(&visual), - width, height, - (void *) surf); - - st_resize_framebuffer(surf->Framebuffer, width, height); - - return &surf->Base; -} - - -static _EGLSurface * -xlib_eglCreatePbufferSurface(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, - const EGLint *attrib_list) -{ - struct xlib_egl_display *xdpy = xlib_egl_display(disp); - struct xlib_egl_surface *surf; - __GLcontextModes visual; - uint width, height; - EGLBoolean bind_texture; - - surf = CALLOC_STRUCT(xlib_egl_surface); - if (!surf) { - _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); - return NULL; - } - - if (!_eglInitSurface(drv, &surf->Base, EGL_PBUFFER_BIT, - conf, attrib_list)) { - free(surf); - return NULL; - } - if (surf->Base.Width < 0 || surf->Base.Height < 0) { - _eglError(EGL_BAD_PARAMETER, "eglCreatePbufferSurface"); - free(surf); - return NULL; - } - - bind_texture = (surf->Base.TextureFormat != EGL_NO_TEXTURE); - width = (uint) surf->Base.Width; - height = (uint) surf->Base.Height; - if ((surf->Base.TextureTarget == EGL_NO_TEXTURE && bind_texture) || - (surf->Base.TextureTarget != EGL_NO_TEXTURE && !bind_texture)) { - _eglError(EGL_BAD_MATCH, "eglCreatePbufferSurface"); - free(surf); - return NULL; - } - /* a framebuffer of zero width or height confuses st */ - if (width == 0 || height == 0) { - _eglError(EGL_BAD_MATCH, "eglCreatePbufferSurface"); - free(surf); - return NULL; - } - /* no mipmap generation */ - if (surf->Base.MipmapTexture) { - _eglError(EGL_BAD_MATCH, "eglCreatePbufferSurface"); - free(surf); - return NULL; - } - - surf->winsys = xdpy->winsys; - - _eglConfigToContextModesRec(conf, &visual); - - /* Create GL statetracker framebuffer */ - surf->Framebuffer = st_create_framebuffer(&visual, - choose_color_format(&visual), - choose_depth_format(&visual), - choose_stencil_format(&visual), - width, height, - (void *) surf); - st_resize_framebuffer(surf->Framebuffer, width, height); - - return &surf->Base; -} - - -static EGLBoolean -xlib_eglDestroySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface) -{ - struct xlib_egl_surface *surf = lookup_surface(surface); - if (!_eglIsSurfaceBound(&surf->Base)) { - if (surf->Base.Type != EGL_PBUFFER_BIT) - XFreeGC(surf->Dpy, surf->Gc); - st_unreference_framebuffer(surf->Framebuffer); - free(surf); - } - return EGL_TRUE; -} - - -static EGLBoolean -xlib_eglBindTexImage(_EGLDriver *drv, _EGLDisplay *dpy, - _EGLSurface *surface, EGLint buffer) -{ - struct xlib_egl_surface *xsurf = lookup_surface(surface); - struct xlib_egl_context *xctx; - struct pipe_surface *psurf; - enum pipe_format format; - int target; - - if (!xsurf || xsurf->Base.Type != EGL_PBUFFER_BIT) - return _eglError(EGL_BAD_SURFACE, "eglBindTexImage"); - if (buffer != EGL_BACK_BUFFER) - return _eglError(EGL_BAD_PARAMETER, "eglBindTexImage"); - if (xsurf->Base.BoundToTexture) - return _eglError(EGL_BAD_ACCESS, "eglBindTexImage"); - - /* this should be updated when choose_color_format is */ - switch (xsurf->Base.TextureFormat) { - case EGL_TEXTURE_RGB: - format = PIPE_FORMAT_R8G8B8_UNORM; - break; - case EGL_TEXTURE_RGBA: - format = PIPE_FORMAT_A8R8G8B8_UNORM; - break; - default: - return _eglError(EGL_BAD_MATCH, "eglBindTexImage"); - } - - switch (xsurf->Base.TextureTarget) { - case EGL_TEXTURE_2D: - target = ST_TEXTURE_2D; - break; - default: - return _eglError(EGL_BAD_MATCH, "eglBindTexImage"); - } - - /* flush properly */ - if (eglGetCurrentSurface(EGL_DRAW) == surface) { - xctx = lookup_context(_eglGetCurrentContext()); - st_flush(xctx->Context, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, - NULL); - } - else if (_eglIsSurfaceBound(&xsurf->Base)) { - xctx = lookup_context(xsurf->Base.Binding); - if (xctx) - st_finish(xctx->Context); - } - - st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, - &psurf); - st_bind_texture_surface(psurf, target, xsurf->Base.MipmapLevel, format); - xsurf->Base.BoundToTexture = EGL_TRUE; - - return EGL_TRUE; -} - - -static EGLBoolean -xlib_eglReleaseTexImage(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, - EGLint buffer) -{ - struct xlib_egl_surface *xsurf = lookup_surface(surface); - struct pipe_surface *psurf; - - if (!xsurf || xsurf->Base.Type != EGL_PBUFFER_BIT || - !xsurf->Base.BoundToTexture) - return _eglError(EGL_BAD_SURFACE, "eglReleaseTexImage"); - if (buffer != EGL_BACK_BUFFER) - return _eglError(EGL_BAD_PARAMETER, "eglReleaseTexImage"); - - st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, - &psurf); - st_unbind_texture_surface(psurf, ST_TEXTURE_2D, xsurf->Base.MipmapLevel); - xsurf->Base.BoundToTexture = EGL_FALSE; - - return EGL_TRUE; -} - - -static EGLBoolean -xlib_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw) -{ - struct xlib_egl_surface *xsurf = lookup_surface(draw); - struct pipe_winsys *pws = xsurf->winsys; - struct pipe_surface *psurf; - - st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, - &psurf); - - st_notify_swapbuffers(xsurf->Framebuffer); - - display_surface(pws, psurf, xsurf); - - check_and_update_buffer_size(xsurf); - - return EGL_TRUE; -} - - -/** - * Determine which API(s) is(are) present by looking for some specific - * global symbols. - */ -static EGLint -find_supported_apis(void) -{ - EGLint mask = 0; - void *handle; - - handle = dlopen(NULL, RTLD_LAZY | RTLD_LOCAL); - if(!handle) - return mask; - - if (dlsym(handle, "st_api_OpenGL_ES1")) - mask |= EGL_OPENGL_ES_BIT; - - if (dlsym(handle, "st_api_OpenGL_ES2")) - mask |= EGL_OPENGL_ES2_BIT; - - if (dlsym(handle, "st_api_OpenGL")) - mask |= EGL_OPENGL_BIT; - - if (dlsym(handle, "st_api_OpenVG")) - mask |= EGL_OPENVG_BIT; - - dlclose(handle); - - return mask; -} - - -static void -xlib_Unload(_EGLDriver *drv) -{ - struct xlib_egl_driver *xdrv = xlib_egl_driver(drv); - free(xdrv); -} - - -/** - * This is the main entrypoint into the driver. - * Called by libEGL to instantiate an _EGLDriver object. - */ -_EGLDriver * -_eglMain(const char *args) -{ - struct xlib_egl_driver *xdrv; - - _eglLog(_EGL_INFO, "Entering EGL/Xlib _eglMain(%s)", args); - - xdrv = CALLOC_STRUCT(xlib_egl_driver); - if (!xdrv) - return NULL; - - _eglInitDriverFallbacks(&xdrv->Base); - xdrv->Base.API.Initialize = xlib_eglInitialize; - xdrv->Base.API.Terminate = xlib_eglTerminate; - xdrv->Base.API.GetProcAddress = xlib_eglGetProcAddress; - xdrv->Base.API.CreateContext = xlib_eglCreateContext; - xdrv->Base.API.DestroyContext = xlib_eglDestroyContext; - xdrv->Base.API.CreateWindowSurface = xlib_eglCreateWindowSurface; - xdrv->Base.API.CreatePbufferSurface = xlib_eglCreatePbufferSurface; - xdrv->Base.API.DestroySurface = xlib_eglDestroySurface; - xdrv->Base.API.BindTexImage = xlib_eglBindTexImage; - xdrv->Base.API.ReleaseTexImage = xlib_eglReleaseTexImage; - xdrv->Base.API.MakeCurrent = xlib_eglMakeCurrent; - xdrv->Base.API.SwapBuffers = xlib_eglSwapBuffers; - - xdrv->apis = find_supported_apis(); - if (xdrv->apis == 0x0) { - /* the app isn't directly linked with any EGL-supprted APIs - * (such as libGLESv2.so) so use an EGL utility to see what - * APIs might be loaded dynamically on this system. - */ - xdrv->apis = _eglFindAPIs(); - } - - xdrv->Base.Name = "Xlib/softpipe"; - xdrv->Base.Unload = xlib_Unload; - - return &xdrv->Base; -} diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile index 3965bd949f4..f07a7926d63 100644 --- a/src/gallium/winsys/g3dvl/nouveau/Makefile +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -20,14 +20,11 @@ LDFLAGS += -L${DRMDIR}/lib \ -L${DRIDIR}/lib \ -L${GALLIUMDIR}/winsys/drm/nouveau/common \ -L${GALLIUMDIR}/auxiliary \ - -L${GALLIUMDIR}/drivers/nv04 \ - -L${GALLIUMDIR}/drivers/nv10 \ - -L${GALLIUMDIR}/drivers/nv20 \ -L${GALLIUMDIR}/drivers/nv30 \ -L${GALLIUMDIR}/drivers/nv40 \ -L${GALLIUMDIR}/drivers/nv50 -LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -lgallium -lm +LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv30 -lnv40 -lnv50 -lgallium -lm ############################################# diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c index f15bcd37b50..048af62ed30 100644 --- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c +++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c @@ -27,9 +27,9 @@ #include <vl_winsys.h> #include <X11/Xutil.h> -#include <pipe/internal/p_winsys_screen.h> +#include <util/u_simple_screen.h> #include <pipe/p_state.h> -#include <pipe/p_inlines.h> +#include <util/u_inlines.h> #include <util/u_format.h> #include <util/u_memory.h> #include <util/u_math.h> diff --git a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c index 7d076be3a31..03dbd76c375 100644 --- a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c @@ -38,7 +38,7 @@ #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -216,11 +216,6 @@ no_winsys: } -static struct pipe_context * -gdi_llvmpipe_context_create(struct pipe_screen *screen) -{ - return llvmpipe_create(screen); -} static void @@ -243,7 +238,6 @@ gdi_llvmpipe_present(struct pipe_screen *screen, static const struct stw_winsys stw_winsys = { &gdi_llvmpipe_screen_create, - &gdi_llvmpipe_context_create, &gdi_llvmpipe_present, NULL, /* get_adapter_luid */ NULL, /* shared_surface_open */ diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 2ad794c3f0f..2078020f8f7 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -38,10 +38,10 @@ #include <windows.h> -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -249,13 +249,6 @@ gdi_softpipe_screen_create(void) } -static struct pipe_context * -gdi_softpipe_context_create(struct pipe_screen *screen) -{ - return softpipe_create(screen); -} - - static void gdi_softpipe_present(struct pipe_screen *screen, struct pipe_surface *surface, @@ -291,7 +284,6 @@ gdi_softpipe_present(struct pipe_screen *screen, static const struct stw_winsys stw_winsys = { &gdi_softpipe_screen_create, - &gdi_softpipe_context_create, &gdi_softpipe_present, NULL, /* get_adapter_luid */ NULL, /* shared_surface_open */ diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c index 6dbe05f193e..67617a470d6 100644 --- a/src/gallium/winsys/xlib/xlib.c +++ b/src/gallium/winsys/xlib/xlib.c @@ -47,6 +47,8 @@ enum mode { MODE_SOFTPIPE }; +/* advertise OpenGL support */ +PUBLIC const int st_api_OpenGL = 1; static enum mode get_mode() { @@ -103,3 +105,34 @@ extern void (*linker_foo(const unsigned char *procName))() { return glXGetProcAddress(procName); } + + +/** + * When GLX_INDIRECT_RENDERING is defined, some symbols are missing in + * libglapi.a. We need to define them here. + */ +#ifdef GLX_INDIRECT_RENDERING + +#define GL_GLEXT_PROTOTYPES +#include "GL/gl.h" +#include "glapi/glapi.h" +#include "glapi/glapitable.h" +#include "glapi/glapidispatch.h" + +#if defined(USE_MGL_NAMESPACE) +#define NAME(func) mgl##func +#else +#define NAME(func) gl##func +#endif + +#define DISPATCH(FUNC, ARGS, MESSAGE) \ + CALL_ ## FUNC(GET_DISPATCH(), ARGS); + +#define RETURN_DISPATCH(FUNC, ARGS, MESSAGE) \ + return CALL_ ## FUNC(GET_DISPATCH(), ARGS); + +/* skip normal ones */ +#define _GLAPI_SKIP_NORMAL_ENTRY_POINTS +#include "glapi/glapitemp.h" + +#endif /* GLX_INDIRECT_RENDERING */ diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c index fc9addd09e3..22bf41a46f7 100644 --- a/src/gallium/winsys/xlib/xlib_brw_context.c +++ b/src/gallium/winsys/xlib/xlib_brw_context.c @@ -36,8 +36,8 @@ /* #include "glxheader.h" */ /* #include "xmesaP.h" */ -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" +#include "util/u_simple_screen.h" +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "i965simple/brw_winsys.h" diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c index 47ae0519a4b..1dc9e8fa11f 100644 --- a/src/gallium/winsys/xlib/xlib_cell.c +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -41,10 +41,10 @@ #undef ASSERT #undef Elements -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -383,35 +383,10 @@ fail: } -static struct pipe_context * -xlib_create_cell_context( struct pipe_screen *screen, - void *priv ) -{ - struct pipe_context *pipe; - - - /* This takes a cell_winsys pointer, but probably that should be - * created and stored at screen creation, not context creation. - * - * The actual cell_winsys value isn't used for anything, so just - * passing NULL for now. - */ - pipe = cell_create_context( screen, NULL); - if (pipe == NULL) - goto fail; - - pipe->priv = priv; - - return pipe; - -fail: - return NULL; -} struct xm_driver xlib_cell_driver = { .create_pipe_screen = xlib_create_cell_screen, - .create_pipe_context = xlib_create_cell_context, .display_surface = xlib_cell_display_surface, }; @@ -420,7 +395,6 @@ struct xm_driver xlib_cell_driver = struct xm_driver xlib_cell_driver = { .create_pipe_screen = NULL, - .create_pipe_context = NULL, .display_surface = NULL, }; diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c index 2a434b5fd21..6cebd4c2012 100644 --- a/src/gallium/winsys/xlib/xlib_llvmpipe.c +++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c @@ -40,10 +40,10 @@ #undef ASSERT #undef Elements -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -419,25 +419,6 @@ fail: } -static struct pipe_context * -xlib_create_llvmpipe_context( struct pipe_screen *screen, - void *context_private ) -{ - struct pipe_context *pipe; - - pipe = llvmpipe_create(screen); - if (pipe == NULL) - goto fail; - - pipe->priv = context_private; - return pipe; - -fail: - /* Free stuff here */ - return NULL; -} - - static void xlib_llvmpipe_display_surface(struct xmesa_buffer *xm_buffer, struct pipe_surface *surf) @@ -453,7 +434,6 @@ xlib_llvmpipe_display_surface(struct xmesa_buffer *xm_buffer, struct xm_driver xlib_llvmpipe_driver = { .create_pipe_screen = xlib_create_llvmpipe_screen, - .create_pipe_context = xlib_create_llvmpipe_context, .display_surface = xlib_llvmpipe_display_surface }; diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index f7c0099584e..716338aef47 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -38,10 +38,10 @@ #undef ASSERT #undef Elements -#include "pipe/internal/p_winsys_screen.h" +#include "util/u_simple_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -63,7 +63,7 @@ struct xm_buffer XImage *tempImage; #ifdef USE_XSHM - int shm; + boolean shm; /** Is this a shared memory buffer? */ XShmSegmentInfo shminfo; #endif }; @@ -152,7 +152,7 @@ alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, &b->shminfo, width, height); if (b->tempImage == NULL) { - b->shm = 0; + b->shm = FALSE; return; } @@ -169,12 +169,12 @@ alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, mesaXErrorFlag = 0; XDestroyImage(b->tempImage); b->tempImage = NULL; - b->shm = 0; + b->shm = FALSE; (void) XSetErrorHandler(old_handler); return; } - b->shm = 1; + b->shm = TRUE; } #endif /* USE_XSHM */ @@ -204,6 +204,14 @@ xm_buffer_destroy(struct pipe_buffer *buf) { struct xm_buffer *oldBuf = xm_buffer(buf); + /* + * Note oldBuf->data may point to one of three things: + * 1. XShm shared memory image data + * 2. User-provided (wrapped) memory, see xm_user_buffer_create() + * 3. Regular, malloc'd memory + * We need to be careful with freeing that data now. + */ + if (oldBuf->data) { #ifdef USE_XSHM if (oldBuf->shminfo.shmid >= 0) { @@ -213,12 +221,20 @@ xm_buffer_destroy(struct pipe_buffer *buf) oldBuf->shminfo.shmid = -1; oldBuf->shminfo.shmaddr = (char *) -1; } - else + + if (oldBuf->shm) { + oldBuf->data = NULL; + } + + if (oldBuf->tempImage) { + XDestroyImage(oldBuf->tempImage); + oldBuf->tempImage = NULL; + } #endif - { - if (!oldBuf->userBuffer) { - align_free(oldBuf->data); - } + + if (oldBuf->data && !oldBuf->userBuffer) { + /* this was regular malloc'd memory */ + align_free(oldBuf->data); } oldBuf->data = NULL; @@ -327,10 +343,8 @@ xm_buffer_create(struct pipe_winsys *pws, buffer->base.usage = usage; buffer->base.size = size; - if (buffer->data == NULL) { - /* align to 16-byte multiple for Cell */ - buffer->data = align_malloc(size, max(alignment, 16)); - } + /* align to 16-byte multiple for Cell */ + buffer->data = align_malloc(size, max(alignment, 16)); return &buffer->base; } @@ -484,28 +498,9 @@ fail: } -static struct pipe_context * -xlib_create_softpipe_context( struct pipe_screen *screen, - void *context_private ) -{ - struct pipe_context *pipe; - - pipe = softpipe_create(screen); - if (pipe == NULL) - goto fail; - - pipe->priv = context_private; - return pipe; - -fail: - /* Free stuff here */ - return NULL; -} - struct xm_driver xlib_softpipe_driver = { .create_pipe_screen = xlib_create_softpipe_screen, - .create_pipe_context = xlib_create_softpipe_context, .display_surface = xlib_softpipe_display_surface }; |