diff options
Diffstat (limited to 'src/gallium')
434 files changed, 18634 insertions, 8475 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 63983c52201..136423513c6 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -54,13 +54,13 @@ install: ##### RULES ##### .c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ .cpp.o: - $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CXX) -c $(INCLUDES) $(DEFINES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ .S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ sinclude depend diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 8be84cddbe7..eea32b1314b 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -2,29 +2,7 @@ import os Import('*') -env = env.Clone() - -auxiliaries = [] - -Export('auxiliaries') - - -if llvm: - SConscript(['auxiliary/gallivm/SConscript']) - -SConscript([ - # NOTE: order matters! - 'auxiliary/util/SConscript', - 'auxiliary/rtasm/SConscript', - 'auxiliary/tgsi/SConscript', - 'auxiliary/cso_cache/SConscript', - 'auxiliary/translate/SConscript', - 'auxiliary/draw/SConscript', - 'auxiliary/pipebuffer/SConscript', - 'auxiliary/indices/SConscript', - 'auxiliary/rbug/SConscript', - 'auxiliary/vl/SConscript', -]) +SConscript('auxiliary/SConscript') for driver in env['drivers']: SConscript(os.path.join('drivers', driver, 'SConscript')) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 5446eb68a98..e3af41c6e04 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -1,12 +1,177 @@ -# src/gallium/auxiliary/Makefile TOP = ../../.. include $(TOP)/configs/current -SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) +LIBNAME = gallium -default install clean: - @for dir in $(SUBDIRS) ; do \ - if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE) $@) || exit 1; \ - fi \ - done +C_SOURCES = \ + cso_cache/cso_context.c \ + cso_cache/cso_cache.c \ + cso_cache/cso_hash.c \ + draw/draw_context.c \ + draw/draw_gs.c \ + draw/draw_pipe.c \ + draw/draw_pipe_aaline.c \ + draw/draw_pipe_aapoint.c \ + draw/draw_pipe_clip.c \ + draw/draw_pipe_cull.c \ + draw/draw_pipe_flatshade.c \ + draw/draw_pipe_offset.c \ + draw/draw_pipe_pstipple.c \ + draw/draw_pipe_stipple.c \ + draw/draw_pipe_twoside.c \ + draw/draw_pipe_unfilled.c \ + draw/draw_pipe_util.c \ + draw/draw_pipe_validate.c \ + draw/draw_pipe_vbuf.c \ + draw/draw_pipe_wide_line.c \ + draw/draw_pipe_wide_point.c \ + draw/draw_pt.c \ + draw/draw_pt_elts.c \ + draw/draw_pt_emit.c \ + draw/draw_pt_fetch.c \ + draw/draw_pt_fetch_emit.c \ + draw/draw_pt_fetch_shade_emit.c \ + draw/draw_pt_fetch_shade_pipeline.c \ + draw/draw_pt_post_vs.c \ + draw/draw_pt_util.c \ + draw/draw_pt_varray.c \ + draw/draw_pt_vcache.c \ + draw/draw_vertex.c \ + draw/draw_vs.c \ + draw/draw_vs_varient.c \ + draw/draw_vs_aos.c \ + draw/draw_vs_aos_io.c \ + draw/draw_vs_aos_machine.c \ + draw/draw_vs_exec.c \ + draw/draw_vs_llvm.c \ + draw/draw_vs_ppc.c \ + draw/draw_vs_sse.c \ + indices/u_indices_gen.c \ + indices/u_unfilled_gen.c \ + pipebuffer/pb_buffer_fenced.c \ + pipebuffer/pb_buffer_malloc.c \ + pipebuffer/pb_bufmgr_alt.c \ + pipebuffer/pb_bufmgr_cache.c \ + pipebuffer/pb_bufmgr_debug.c \ + pipebuffer/pb_bufmgr_fenced.c \ + pipebuffer/pb_bufmgr_mm.c \ + pipebuffer/pb_bufmgr_ondemand.c \ + pipebuffer/pb_bufmgr_pool.c \ + pipebuffer/pb_bufmgr_slab.c \ + pipebuffer/pb_validate.c \ + rbug/rbug_connection.c \ + rbug/rbug_core.c \ + rbug/rbug_texture.c \ + rbug/rbug_context.c \ + rbug/rbug_shader.c \ + rbug/rbug_demarshal.c \ + rtasm/rtasm_cpu.c \ + rtasm/rtasm_execmem.c \ + rtasm/rtasm_x86sse.c \ + rtasm/rtasm_ppc.c \ + rtasm/rtasm_ppc_spe.c \ + tgsi/tgsi_sanity.c \ + tgsi/tgsi_build.c \ + tgsi/tgsi_dump.c \ + tgsi/tgsi_exec.c \ + tgsi/tgsi_info.c \ + tgsi/tgsi_iterate.c \ + tgsi/tgsi_parse.c \ + tgsi/tgsi_ppc.c \ + tgsi/tgsi_scan.c \ + tgsi/tgsi_sse2.c \ + tgsi/tgsi_text.c \ + tgsi/tgsi_transform.c \ + tgsi/tgsi_ureg.c \ + tgsi/tgsi_util.c \ + translate/translate_generic.c \ + translate/translate_sse.c \ + translate/translate.c \ + translate/translate_cache.c \ + util/u_debug.c \ + util/u_debug_dump.c \ + util/u_debug_symbol.c \ + util/u_debug_stack.c \ + util/u_blit.c \ + util/u_blitter.c \ + util/u_cache.c \ + util/u_cpu_detect.c \ + util/u_dl.c \ + util/u_draw_quad.c \ + util/u_format.c \ + util/u_format_access.c \ + util/u_format_table.c \ + util/u_gen_mipmap.c \ + util/u_handle_table.c \ + util/u_hash_table.c \ + util/u_hash.c \ + util/u_keymap.c \ + util/u_linear.c \ + util/u_network.c \ + util/u_math.c \ + util/u_mm.c \ + util/u_rect.c \ + util/u_simple_shaders.c \ + util/u_snprintf.c \ + util/u_stream_stdc.c \ + util/u_stream_wd.c \ + util/u_surface.c \ + util/u_texture.c \ + util/u_tile.c \ + util/u_time.c \ + util/u_timed_winsys.c \ + util/u_upload_mgr.c \ + util/u_simple_screen.c \ + vl/vl_bitstream_parser.c \ + vl/vl_mpeg12_mc_renderer.c \ + vl/vl_compositor.c \ + vl/vl_csc.c \ + vl/vl_shader_build.c + +GALLIVM_SOURCES = \ + gallivm/gallivm.cpp \ + gallivm/gallivm_cpu.cpp \ + gallivm/instructions.cpp \ + gallivm/loweringpass.cpp \ + gallivm/tgsitollvm.cpp \ + gallivm/storage.cpp \ + gallivm/storagesoa.cpp \ + gallivm/instructionssoa.cpp + +INC_SOURCES = \ + gallivm/gallivm_builtins.cpp \ + gallivm/gallivmsoabuiltins.cpp + +# XXX: gallivm doesn't build correctly so disable for now +#ifeq ($(MESA_LLVM),1) +#DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +#CPP_SOURCES += \ +# $(GALLIVM_SOURCES) +#endif + + +include ../Makefile.template + + +gallivm/gallivm_builtins.cpp: gallivm/llvm_builtins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp1.bin + +gallivm/gallivmsoabuiltins.cpp: gallivm/soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp2.bin + + +indices/u_indices_gen.c: indices/u_indices_gen.py + python $< > $@ + +indices/u_unfilled_gen.c: indices/u_unfilled_gen.py + python $< > $@ + +util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv + python util/u_format_table.py util/u_format.csv > $@ + +util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv + python util/u_format_access.py util/u_format.csv > $@ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript new file mode 100644 index 00000000000..782eb533863 --- /dev/null +++ b/src/gallium/auxiliary/SConscript @@ -0,0 +1,185 @@ +Import('*') + +from sys import executable as python_cmd + +env.Append(CPPPATH = [ + 'indices', + 'util', +]) + +env.CodeGenerate( + target = 'indices/u_indices_gen.c', + script = 'indices/u_indices_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'indices/u_unfilled_gen.c', + script = 'indices/u_unfilled_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_table.c', + script = 'util/u_format_table.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_access.c', + script = 'util/u_format_access.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +source = [ + 'cso_cache/cso_context.c', + 'cso_cache/cso_cache.c', + 'cso_cache/cso_hash.c', + 'draw/draw_context.c', + 'draw/draw_pipe.c', + 'draw/draw_pipe_aaline.c', + 'draw/draw_pipe_aapoint.c', + 'draw/draw_pipe_clip.c', + 'draw/draw_pipe_cull.c', + 'draw/draw_pipe_flatshade.c', + 'draw/draw_pipe_offset.c', + 'draw/draw_pipe_pstipple.c', + 'draw/draw_pipe_stipple.c', + 'draw/draw_pipe_twoside.c', + 'draw/draw_pipe_unfilled.c', + 'draw/draw_pipe_util.c', + 'draw/draw_pipe_validate.c', + 'draw/draw_pipe_vbuf.c', + 'draw/draw_pipe_wide_line.c', + 'draw/draw_pipe_wide_point.c', + 'draw/draw_pt.c', + 'draw/draw_pt_elts.c', + 'draw/draw_pt_emit.c', + 'draw/draw_pt_fetch.c', + 'draw/draw_pt_fetch_emit.c', + 'draw/draw_pt_fetch_shade_emit.c', + 'draw/draw_pt_fetch_shade_pipeline.c', + 'draw/draw_pt_post_vs.c', + 'draw/draw_pt_util.c', + 'draw/draw_pt_varray.c', + 'draw/draw_pt_vcache.c', + 'draw/draw_vertex.c', + 'draw/draw_vs.c', + 'draw/draw_vs_aos.c', + 'draw/draw_vs_aos_io.c', + 'draw/draw_vs_aos_machine.c', + 'draw/draw_vs_exec.c', + 'draw/draw_vs_llvm.c', + 'draw/draw_vs_ppc.c', + 'draw/draw_vs_sse.c', + 'draw/draw_vs_varient.c', + 'draw/draw_gs.c', + #'indices/u_indices.c', + #'indices/u_unfilled_indices.c', + 'indices/u_indices_gen.c', + 'indices/u_unfilled_gen.c', + 'pipebuffer/pb_buffer_fenced.c', + 'pipebuffer/pb_buffer_malloc.c', + 'pipebuffer/pb_bufmgr_alt.c', + 'pipebuffer/pb_bufmgr_cache.c', + 'pipebuffer/pb_bufmgr_debug.c', + 'pipebuffer/pb_bufmgr_fenced.c', + 'pipebuffer/pb_bufmgr_mm.c', + 'pipebuffer/pb_bufmgr_ondemand.c', + 'pipebuffer/pb_bufmgr_pool.c', + 'pipebuffer/pb_bufmgr_slab.c', + 'pipebuffer/pb_validate.c', + 'rbug/rbug_core.c', + 'rbug/rbug_shader.c', + 'rbug/rbug_context.c', + 'rbug/rbug_texture.c', + 'rbug/rbug_demarshal.c', + 'rbug/rbug_connection.c', + 'rtasm/rtasm_cpu.c', + 'rtasm/rtasm_execmem.c', + 'rtasm/rtasm_x86sse.c', + 'rtasm/rtasm_ppc.c', + 'rtasm/rtasm_ppc_spe.c', + 'tgsi/tgsi_build.c', + 'tgsi/tgsi_dump.c', + 'tgsi/tgsi_dump_c.c', + 'tgsi/tgsi_exec.c', + 'tgsi/tgsi_info.c', + 'tgsi/tgsi_iterate.c', + 'tgsi/tgsi_parse.c', + 'tgsi/tgsi_sanity.c', + 'tgsi/tgsi_scan.c', + 'tgsi/tgsi_ppc.c', + 'tgsi/tgsi_sse2.c', + 'tgsi/tgsi_text.c', + 'tgsi/tgsi_transform.c', + 'tgsi/tgsi_ureg.c', + 'tgsi/tgsi_util.c', + 'translate/translate_generic.c', + 'translate/translate_sse.c', + 'translate/translate.c', + 'translate/translate_cache.c', + 'util/u_bitmask.c', + 'util/u_blit.c', + 'util/u_blitter.c', + 'util/u_cache.c', + 'util/u_cpu_detect.c', + 'util/u_debug.c', + 'util/u_debug_dump.c', + 'util/u_debug_memory.c', + 'util/u_debug_stack.c', + 'util/u_debug_symbol.c', + 'util/u_dl.c', + 'util/u_draw_quad.c', + 'util/u_format.c', + 'util/u_format_access.c', + 'util/u_format_table.c', + 'util/u_gen_mipmap.c', + 'util/u_handle_table.c', + 'util/u_hash.c', + 'util/u_hash_table.c', + 'util/u_keymap.c', + 'util/u_network.c', + 'util/u_math.c', + 'util/u_mm.c', + 'util/u_rect.c', + 'util/u_simple_shaders.c', + 'util/u_snprintf.c', + 'util/u_stream_stdc.c', + 'util/u_stream_wd.c', + 'util/u_surface.c', + 'util/u_texture.c', + 'util/u_tile.c', + 'util/u_time.c', + 'util/u_timed_winsys.c', + 'util/u_upload_mgr.c', + 'util/u_simple_screen.c', + 'vl/vl_bitstream_parser.c', + 'vl/vl_mpeg12_mc_renderer.c', + 'vl/vl_compositor.c', + 'vl/vl_csc.c', + 'vl/vl_shader_build.c', +] + +if env['llvm']: + source += [ + 'gallivm/gallivm.cpp', + 'gallivm/gallivm_cpu.cpp', + 'gallivm/instructions.cpp', + 'gallivm/loweringpass.cpp', + 'gallivm/tgsitollvm.cpp', + 'gallivm/storage.cpp', + 'gallivm/storagesoa.cpp', + 'gallivm/instructionssoa.cpp', + ] + +gallium = env.ConvenienceLibrary( + target = 'gallium', + source = source, +) + +Export('gallium') diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile deleted file mode 100644 index 8726afcd949..00000000000 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = cso_cache - -C_SOURCES = \ - cso_context.c \ - cso_cache.c \ - cso_hash.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript deleted file mode 100644 index 651e68a191a..00000000000 --- a/src/gallium/auxiliary/cso_cache/SConscript +++ /dev/null @@ -1,11 +0,0 @@ -Import('*') - -cso_cache = env.ConvenienceLibrary( - target = 'cso_cache', - source = [ - 'cso_context.c', - 'cso_cache.c', - 'cso_hash.c', - ]) - -auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 80bd0c91db0..2b16332e143 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -42,6 +42,7 @@ #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" #include "cso_cache/cso_hash.h" +#include "cso_context.h" struct cso_context { struct pipe_context *pipe; @@ -85,8 +86,8 @@ struct cso_context { void *blend, *blend_saved; void *depth_stencil, *depth_stencil_saved; void *rasterizer, *rasterizer_saved; - void *fragment_shader, *fragment_shader_saved; - void *vertex_shader, *vertex_shader_saved; + void *fragment_shader, *fragment_shader_saved, *geometry_shader; + void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved; struct pipe_framebuffer_state fb, fb_saved; struct pipe_viewport_state vp, vp_saved; @@ -1027,3 +1028,38 @@ enum pipe_error cso_set_blend_color(struct cso_context *ctx, } return PIPE_OK; } + +enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx, + void *handle) +{ + if (ctx->geometry_shader != handle) { + ctx->geometry_shader = handle; + ctx->pipe->bind_gs_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle) +{ + if (handle == ctx->geometry_shader) { + /* unbind before deleting */ + ctx->pipe->bind_gs_state(ctx->pipe, NULL); + ctx->geometry_shader = NULL; + } + ctx->pipe->delete_gs_state(ctx->pipe, handle); +} + +void cso_save_geometry_shader(struct cso_context *ctx) +{ + assert(!ctx->geometry_shader_saved); + ctx->geometry_shader_saved = ctx->geometry_shader; +} + +void cso_restore_geometry_shader(struct cso_context *ctx) +{ + if (ctx->geometry_shader_saved != ctx->geometry_shader) { + ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved); + ctx->geometry_shader = ctx->geometry_shader_saved; + } + ctx->geometry_shader_saved = NULL; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index e5b92177cfd..b9e313e32d6 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -146,6 +146,13 @@ void cso_save_vertex_shader(struct cso_context *cso); void cso_restore_vertex_shader(struct cso_context *cso); +enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx, + void *handle); +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle); +void cso_save_geometry_shader(struct cso_context *cso); +void cso_restore_geometry_shader(struct cso_context *cso); + + enum pipe_error cso_set_framebuffer(struct cso_context *cso, const struct pipe_framebuffer_state *fb); diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile deleted file mode 100644 index 5041dcc072b..00000000000 --- a/src/gallium/auxiliary/draw/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = draw - -C_SOURCES = \ - draw_context.c \ - draw_pipe.c \ - draw_pipe_aaline.c \ - draw_pipe_aapoint.c \ - draw_pipe_clip.c \ - draw_pipe_cull.c \ - draw_pipe_flatshade.c \ - draw_pipe_offset.c \ - draw_pipe_pstipple.c \ - draw_pipe_stipple.c \ - draw_pipe_twoside.c \ - draw_pipe_unfilled.c \ - draw_pipe_util.c \ - draw_pipe_validate.c \ - draw_pipe_vbuf.c \ - draw_pipe_wide_line.c \ - draw_pipe_wide_point.c \ - draw_pt.c \ - draw_pt_elts.c \ - draw_pt_emit.c \ - draw_pt_fetch.c \ - draw_pt_fetch_emit.c \ - draw_pt_fetch_shade_emit.c \ - draw_pt_fetch_shade_pipeline.c \ - draw_pt_post_vs.c \ - draw_pt_util.c \ - draw_pt_varray.c \ - draw_pt_vcache.c \ - draw_vertex.c \ - draw_vs.c \ - draw_vs_varient.c \ - draw_vs_aos.c \ - draw_vs_aos_io.c \ - draw_vs_aos_machine.c \ - draw_vs_exec.c \ - draw_vs_llvm.c \ - draw_vs_ppc.c \ - draw_vs_sse.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript deleted file mode 100644 index 5f05aa324a5..00000000000 --- a/src/gallium/auxiliary/draw/SConscript +++ /dev/null @@ -1,46 +0,0 @@ -Import('*') - -draw = env.ConvenienceLibrary( - target = 'draw', - source = [ - 'draw_context.c', - 'draw_pipe.c', - 'draw_pipe_aaline.c', - 'draw_pipe_aapoint.c', - 'draw_pipe_clip.c', - 'draw_pipe_cull.c', - 'draw_pipe_flatshade.c', - 'draw_pipe_offset.c', - 'draw_pipe_pstipple.c', - 'draw_pipe_stipple.c', - 'draw_pipe_twoside.c', - 'draw_pipe_unfilled.c', - 'draw_pipe_util.c', - 'draw_pipe_validate.c', - 'draw_pipe_vbuf.c', - 'draw_pipe_wide_line.c', - 'draw_pipe_wide_point.c', - 'draw_pt.c', - 'draw_pt_elts.c', - 'draw_pt_emit.c', - 'draw_pt_fetch.c', - 'draw_pt_fetch_emit.c', - 'draw_pt_fetch_shade_emit.c', - 'draw_pt_fetch_shade_pipeline.c', - 'draw_pt_post_vs.c', - 'draw_pt_util.c', - 'draw_pt_varray.c', - 'draw_pt_vcache.c', - 'draw_vertex.c', - 'draw_vs.c', - 'draw_vs_aos.c', - 'draw_vs_aos_io.c', - 'draw_vs_aos_machine.c', - 'draw_vs_exec.c', - 'draw_vs_llvm.c', - 'draw_vs_ppc.c', - 'draw_vs_sse.c', - 'draw_vs_varient.c' - ]) - -auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index cc5f7f01059..667aa46b208 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -36,6 +36,7 @@ #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" +#include "draw_gs.h" #include "draw_pt.h" #include "draw_pipe.h" @@ -67,6 +68,9 @@ struct draw_context *draw_create( void ) if (!draw_vs_init( draw )) goto fail; + if (!draw_gs_init( draw )) + goto fail; + return draw; fail: @@ -231,11 +235,19 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, - const void *buffer, + unsigned shader_type, + const void *buffer, unsigned size ) { - draw->pt.user.constants = buffer; - draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + debug_assert(shader_type == PIPE_SHADER_VERTEX || + shader_type == PIPE_SHADER_GEOMETRY); + if (shader_type == PIPE_SHADER_VERTEX) { + draw->pt.user.vs_constants = buffer; + draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + } else if (shader_type == PIPE_SHADER_GEOMETRY) { + draw->pt.user.gs_constants = buffer; + draw_gs_set_constants( draw, (const float (*)[4])buffer, size ); + } } @@ -298,7 +310,7 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) * a post-transformed vertex. * * With this function, drivers that use the draw module should have no reason - * to track the current vertex shader. + * to track the current vertex/geometry shader. * * Note that the draw module may sometimes generate vertices with extra * attributes (such as texcoords for AA lines). The driver can call this @@ -309,43 +321,59 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) * work for the drivers. */ int -draw_find_vs_output(const struct draw_context *draw, - uint semantic_name, uint semantic_index) +draw_find_shader_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index) { const struct draw_vertex_shader *vs = draw->vs.vertex_shader; + const struct draw_geometry_shader *gs = draw->gs.geometry_shader; uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == semantic_name && - vs->info.output_semantic_index[i] == semantic_index) + const struct tgsi_shader_info *info = &vs->info; + + if (gs) + info = &gs->info; + + for (i = 0; i < info->num_outputs; i++) { + if (info->output_semantic_name[i] == semantic_name && + info->output_semantic_index[i] == semantic_index) return i; } /* XXX there may be more than one extra vertex attrib. * For example, simulated gl_FragCoord and gl_PointCoord. */ - if (draw->extra_vp_outputs.semantic_name == semantic_name && - draw->extra_vp_outputs.semantic_index == semantic_index) { - return draw->extra_vp_outputs.slot; + if (draw->extra_shader_outputs.semantic_name == semantic_name && + draw->extra_shader_outputs.semantic_index == semantic_index) { + return draw->extra_shader_outputs.slot; } + return 0; } /** - * Return number of vertex shader outputs. + * Return number of the shader outputs. + * + * If geometry shader is present, its output will be returned, + * if not vertex shader is used. */ uint -draw_num_vs_outputs(const struct draw_context *draw) +draw_num_shader_outputs(const struct draw_context *draw) { uint count = draw->vs.vertex_shader->info.num_outputs; - if (draw->extra_vp_outputs.slot > 0) + + /* if geometry shader is present, its outputs go to te + * driver, not the vertex shaders */ + if (draw->gs.geometry_shader) + count = draw->gs.geometry_shader->info.num_outputs; + + if (draw->extra_shader_outputs.slot > 0) count++; return count; } /** - * Provide TGSI sampler objects for vertex shaders that use texture fetches. + * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches. * This might only be used by software drivers for the time being. */ void @@ -355,6 +383,8 @@ draw_texture_samplers(struct draw_context *draw, { draw->vs.num_samplers = num_samplers; draw->vs.samplers = samplers; + draw->gs.num_samplers = num_samplers; + draw->gs.samplers = samplers; } @@ -421,3 +451,18 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) draw->flushing = FALSE; } } + + +int draw_current_shader_outputs(struct draw_context *draw) +{ + if (draw->gs.geometry_shader) + return draw->gs.num_gs_outputs; + return draw->vs.num_vs_outputs; +} + +int draw_current_shader_position_output(struct draw_context *draw) +{ + if (draw->gs.geometry_shader) + return draw->gs.position_output; + return draw->vs.position_output; +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 465b8f10c6c..8a64c06efcd 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -45,6 +45,7 @@ struct pipe_context; struct draw_context; struct draw_stage; struct draw_vertex_shader; +struct draw_geometry_shader; struct tgsi_sampler; @@ -85,11 +86,11 @@ draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe int -draw_find_vs_output(const struct draw_context *draw, - uint semantic_name, uint semantic_index); +draw_find_shader_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index); uint -draw_num_vs_outputs(const struct draw_context *draw); +draw_num_shader_outputs(const struct draw_context *draw); void @@ -112,6 +113,17 @@ void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs); +/* + * Geometry shader functions + */ +struct draw_geometry_shader * +draw_create_geometry_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dvs); +void draw_delete_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dvs); + /* * Vertex data functions @@ -140,6 +152,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); void draw_set_mapped_constant_buffer(struct draw_context *draw, + unsigned shader_type, const void *buffer, unsigned size ); @@ -151,6 +164,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count); +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void draw_flush(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c new file mode 100644 index 00000000000..5db2e755423 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2009 VMWare Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "draw_gs.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" + +#include "pipe/p_shader_tokens.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#define MAX_PRIM_VERTICES 6 +/* fixme: move it from here */ +#define MAX_PRIMITIVES 64 + +boolean +draw_gs_init( struct draw_context *draw ) +{ + draw->gs.machine = tgsi_exec_machine_create(); + if (!draw->gs.machine) + return FALSE; + + draw->gs.machine->Primitives = align_malloc( + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); + if (!draw->gs.machine->Primitives) + return FALSE; + memset(draw->gs.machine->Primitives, 0, + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); + + return TRUE; +} + + +void draw_gs_set_constants( struct draw_context *draw, + const float (*constants)[4], + unsigned size ) +{ +} + + +struct draw_geometry_shader * +draw_create_geometry_shader(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct draw_geometry_shader *gs; + int i; + + gs = CALLOC_STRUCT(draw_geometry_shader); + + if (!gs) + return NULL; + + gs->state = *state; + gs->state.tokens = tgsi_dup_tokens(state->tokens); + if (!gs->state.tokens) { + FREE(gs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &gs->info); + + /* setup the defaults */ + gs->input_primitive = PIPE_PRIM_TRIANGLES; + gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP; + gs->max_output_vertices = 32; + + for (i = 0; i < gs->info.num_properties; ++i) { + if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_INPUT_PRIM) + gs->input_primitive = gs->info.properties[i].data[0]; + else if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_OUTPUT_PRIM) + gs->output_primitive = gs->info.properties[i].data[0]; + else if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_MAX_VERTICES) + gs->max_output_vertices = gs->info.properties[i].data[0]; + } + + gs->machine = draw->gs.machine; + + if (gs) + { + uint i; + for (i = 0; i < gs->info.num_outputs; i++) { + if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && + gs->info.output_semantic_index[i] == 0) + gs->position_output = i; + } + } + + return gs; +} + +void draw_bind_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dgs) +{ + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + + if (dgs) { + draw->gs.geometry_shader = dgs; + draw->gs.num_gs_outputs = dgs->info.num_outputs; + draw->gs.position_output = dgs->position_output; + draw_geometry_shader_prepare(dgs, draw); + } + else { + draw->gs.geometry_shader = NULL; + draw->gs.num_gs_outputs = 0; + } +} + +void draw_delete_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dgs) +{ + FREE(dgs); +} + +static INLINE int num_vertices_for_prim(int prim) +{ + switch(prim) { + case PIPE_PRIM_POINTS: + return 1; + case PIPE_PRIM_LINES: + return 2; + case PIPE_PRIM_LINE_LOOP: + return 2; + case PIPE_PRIM_LINE_STRIP: + return 2; + case PIPE_PRIM_TRIANGLES: + return 3; + case PIPE_PRIM_TRIANGLE_STRIP: + return 3; + case PIPE_PRIM_TRIANGLE_FAN: + return 3; + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return 4; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return 6; + default: + assert(!"Bad geometry shader input"); + return 0; + } +} + +static void draw_fetch_geometry_input(struct draw_geometry_shader *shader, + int start_primitive, + int num_primitives, + const float (*input_ptr)[4], + unsigned input_vertex_stride, + unsigned inputs_from_vs) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned slot, vs_slot, k, j; + unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); + int idx = 0; + + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) { + /*debug_printf("Slot = %d (semantic = %d)\n", slot, + shader->info.input_semantic_name[slot]);*/ + if (shader->info.input_semantic_name[slot] == + TGSI_SEMANTIC_PRIMID) { + for (j = 0; j < num_primitives; ++j) { + machine->Inputs[idx].xyzw[0].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[1].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[2].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[3].f[j] = (float)start_primitive + j; + } + ++idx; + } else { + for (j = 0; j < num_primitives; ++j) { + int vidx = idx; + const float (*prim_ptr)[4]; + /*debug_printf(" %d) Prim (num_verts = %d)\n", start_primitive + j, + num_vertices);*/ + prim_ptr = (const float (*)[4])( + (const char *)input_ptr + + (j * num_vertices * input_vertex_stride)); + + for (k = 0; k < num_vertices; ++k, ++vidx) { + const float (*input)[4]; + input = (const float (*)[4])( + (const char *)prim_ptr + (k * input_vertex_stride)); + vidx = k * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; + /*debug_printf("\t%d)(%d) Input vert:\n", vidx, k);*/ +#if 1 + assert(!util_is_inf_or_nan(input[vs_slot][0])); + assert(!util_is_inf_or_nan(input[vs_slot][1])); + assert(!util_is_inf_or_nan(input[vs_slot][2])); + assert(!util_is_inf_or_nan(input[vs_slot][3])); +#endif + machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0]; + machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1]; + machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2]; + machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3]; +#if 0 + debug_printf("\t\t%d %f %f %f %f\n", slot, + machine->Inputs[vidx].xyzw[0].f[j], + machine->Inputs[vidx].xyzw[1].f[j], + machine->Inputs[vidx].xyzw[2].f[j], + machine->Inputs[vidx].xyzw[3].f[j]); +#endif + } + } + ++vs_slot; + idx += num_vertices; + } + } +} + +static INLINE void +draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, + int num_primitives, + float (*output)[4], + unsigned vertex_size) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned prim_idx, j, slot; + + /* Unswizzle all output results. + */ + /* FIXME: handle all the primitives produced by the gs, not just + * the first one + unsigned prim_count = + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/ + for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { + unsigned num_verts_per_prim = machine->Primitives[0]; + for (j = 0; j < num_verts_per_prim; j++) { + int idx = (prim_idx * num_verts_per_prim + j) * + shader->info.num_outputs; +#ifdef DEBUG_OUTPUTS + debug_printf("%d) Output vert:\n", idx); +#endif + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx]; + output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx]; + output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx]; + output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx]; +#ifdef DEBUG_OUTPUTS + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); +#endif + debug_assert(!util_is_inf_or_nan(output[slot][0])); + } + output = (float (*)[4])((char *)output + vertex_size); + } + } +} + +void draw_geometry_shader_run(struct draw_geometry_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned vertex_size) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); + unsigned num_primitives = count/num_vertices; + unsigned inputs_from_vs = 0; + + machine->Consts = constants; + + for (i = 0; i < shader->info.num_inputs; ++i) { + if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID) + ++inputs_from_vs; + } + + for (i = 0; i < num_primitives; ++i) { + unsigned int max_primitives = 1; + + draw_fetch_geometry_input(shader, i, max_primitives, input, + input_stride, inputs_from_vs); + + tgsi_set_exec_mask(machine, + 1, + max_primitives > 1, + max_primitives > 2, + max_primitives > 3); + + /* run interpreter */ + tgsi_exec_machine_run(machine); + + draw_geometry_fetch_outputs(shader, max_primitives, + output, vertex_size); + } +} + +void draw_geometry_shader_delete(struct draw_geometry_shader *shader) +{ + FREE((void*) shader->state.tokens); + FREE(shader); +} + +void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, + struct draw_context *draw) +{ + if (shader->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(shader->machine, + shader->state.tokens, + draw->gs.num_samplers, + draw->gs.samplers); + } +} diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h new file mode 100644 index 00000000000..d6a97d9c4ef --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2009 VMWare Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRAW_GS_H +#define DRAW_GS_H + +#include "draw_context.h" +#include "draw_private.h" + + +#define MAX_TGSI_PRIMITIVES 4 + +struct draw_context; + +/** + * Private version of the compiled geometry shader + */ +struct draw_geometry_shader { + struct draw_context *draw; + + struct tgsi_exec_machine *machine; + + /* This member will disappear shortly:*/ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + unsigned position_output; + + unsigned max_output_vertices; + unsigned input_primitive; + unsigned output_primitive; + + /* Extracted from shader: + */ + const float (*immediates)[4]; +}; + +void draw_geometry_shader_run(struct draw_geometry_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride); + +void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, + struct draw_context *draw); + +void draw_geometry_shader_delete(struct draw_geometry_shader *shader); + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 14375426ed8..4585dcdb48a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -660,13 +660,13 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) } /* update vertex attrib info */ - aaline->tex_slot = draw->vs.num_vs_outputs; - aaline->pos_slot = draw->vs.position_output; + aaline->tex_slot = draw_current_shader_outputs(draw); + aaline->pos_slot = draw_current_shader_position_output(draw);; /* advertise the extra post-transformed vertex attribute */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib; - draw->extra_vp_outputs.slot = aaline->tex_slot; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib; + draw->extra_shader_outputs.slot = aaline->tex_slot; /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -707,7 +707,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->state.texture); draw->suspend_flushing = FALSE; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 75130a8fb0e..d86717e5182 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -687,14 +687,14 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) bind_aapoint_fragment_shader(aapoint); /* update vertex attrib info */ - aapoint->tex_slot = draw->vs.num_vs_outputs; + aapoint->tex_slot = draw_current_shader_outputs(draw); assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ - aapoint->pos_slot = draw->vs.position_output; + aapoint->pos_slot = draw_current_shader_position_output(draw); - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; - draw->extra_vp_outputs.slot = aapoint->tex_slot; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib; + draw->extra_shader_outputs.slot = aapoint->tex_slot; /* find psize slot in post-transform vertex */ aapoint->psize_slot = -1; @@ -731,7 +731,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags) aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); draw->suspend_flushing = FALSE; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 0670268a196..51a6115ebf5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -55,7 +55,7 @@ -struct clipper { +struct clip_stage { struct draw_stage stage; /**< base class */ /* Basically duplicate some of the flatshading logic here: @@ -70,9 +70,9 @@ struct clipper { /* This is a bit confusing: */ -static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { - return (struct clipper *)stage; + return (struct clip_stage *)stage; } @@ -92,11 +92,12 @@ static void interp_attr( float *fdst, fdst[3] = LINTERP( t, fout[3], fin[3] ); } + static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) { - const struct clipper *clipper = clipper_stage(stage); + const struct clip_stage *clipper = clip_stage(stage); uint i; for (i = 0; i < clipper->num_color_attribs; i++) { const uint attr = clipper->color_attribs[i]; @@ -108,14 +109,14 @@ static void copy_colors( struct draw_stage *stage, /* Interpolate between two vertices to produce a third. */ -static void interp( const struct clipper *clip, +static void interp( const struct clip_stage *clip, struct vertex_header *dst, float t, const struct vertex_header *out, const struct vertex_header *in ) { - const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; - const unsigned pos_attr = clip->stage.draw->vs.position_output; + const unsigned nr_attrs = draw_current_shader_outputs(clip->stage.draw); + const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw); unsigned j; /* Vertex header. @@ -179,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ if (i == n-1) - header.flags |= edge_last; + header.flags |= edge_last; if (0) { const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; @@ -200,13 +201,14 @@ static void emit_poly( struct draw_stage *stage, } } + static INLINE float dot4(const float *a, const float *b) { - return (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); + return (a[0] * b[0] + + a[1] * b[1] + + a[2] * b[2] + + a[3] * b[3]); } @@ -217,7 +219,7 @@ do_clip_tri( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *a[MAX_CLIPPED_VERTICES]; struct vertex_header *b[MAX_CLIPPED_VERTICES]; struct vertex_header **inlist = a; @@ -280,6 +282,7 @@ do_clip_tri( struct draw_stage *stage, dp_prev = dp; } + /* swap in/out lists */ { struct vertex_header **tmp = inlist; inlist = outlist; @@ -291,15 +294,11 @@ do_clip_tri( struct draw_stage *stage, /* If flat-shading, copy color to new provoking vertex. */ if (clipper->flat && inlist[0] != header->v[2]) { - if (1) { - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - } + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[2]); } - - /* Emit the polygon as triangles to the setup stage: */ if (n >= 3) @@ -314,7 +313,7 @@ do_clip_line( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - const struct clipper *clipper = clipper_stage( stage ); + const struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; const float *pos0 = v0->clip; @@ -416,13 +415,14 @@ clip_tri( struct draw_stage *stage, } } + /* Update state. Could further delay this until we hit the first * primitive that really requires clipping. */ static void clip_init_state( struct draw_stage *stage ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; @@ -488,7 +488,7 @@ static void clip_destroy( struct draw_stage *stage ) */ struct draw_stage *draw_clip_stage( struct draw_context *draw ) { - struct clipper *clipper = CALLOC_STRUCT(clipper); + struct clip_stage *clipper = CALLOC_STRUCT(clip_stage); if (clipper == NULL) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 0a70483858c..11b39db5990 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -55,7 +55,7 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) static void cull_tri( struct draw_stage *stage, struct prim_header *header ) { - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); /* Window coords: */ const float *v0 = header->v[0]->data[pos]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 40798a5d6e7..e829492423e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -63,7 +63,7 @@ static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) static void do_offset_tri( struct draw_stage *stage, struct prim_header *header ) { - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); struct offset_stage *offset = offset_stage(stage); float inv_det = 1.0f / header->det; diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 6e921bac278..70fbab9ea76 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -73,7 +73,8 @@ screen_interp( struct draw_context *draw, const struct vertex_header *v1 ) { uint attr; - for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { + int num_outputs = draw_current_shader_outputs(draw); + for (attr = 0; attr < num_outputs; attr++) { const float *val0 = v0->data[attr]; const float *val1 = v1->data[attr]; float *newv = dst->data[attr]; @@ -121,7 +122,7 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) struct stipple_stage *stipple = stipple_stage(stage); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const float *pos0 = v0->data[pos]; const float *pos1 = v1->data[pos]; float start = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 1a5269c0de9..d40c0352401 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf, /* Note: we really do want data[0] here, not data[pos]: */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); - vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -271,10 +271,12 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index f32cbef983d..3073c870825 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -59,7 +59,7 @@ static void wideline_line( struct draw_stage *stage, struct prim_header *header ) { /*const struct wideline_stage *wide = wideline_stage(stage);*/ - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const float half_width = 0.5f * stage->draw->rasterizer->line_width; struct prim_header tri; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 7d76a7dbf39..8dc50c0ab43 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -112,7 +112,7 @@ static void set_texcoords(const struct widepoint_stage *wide, if (wide->point_coord_fs_input >= 0) { /* put gl_PointCoord into the extra vertex slot */ - uint slot = wide->stage.draw->extra_vp_outputs.slot; + uint slot = wide->stage.draw->extra_shader_outputs.slot; v->data[slot][0] = tc[0]; v->data[slot][1] = tc[1]; v->data[slot][2] = 0.0F; @@ -130,7 +130,7 @@ static void widepoint_point( struct draw_stage *stage, struct prim_header *header ) { const struct widepoint_stage *wide = widepoint_stage(stage); - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; float half_size; float left_adj, right_adj, bot_adj, top_adj; @@ -257,13 +257,13 @@ static void widepoint_first_point( struct draw_stage *stage, wide->point_coord_fs_input = find_pntc_input_attrib(draw); /* setup extra vp output (point coord implemented as a texcoord) */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = 0; - draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = 0; + draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw); } else { wide->point_coord_fs_input = -1; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } wide->psize_slot = -1; @@ -287,7 +287,7 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags ) { stage->point = widepoint_first_point; stage->next->flush( stage->next, flags ); - stage->draw->extra_vp_outputs.slot = 0; + stage->draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3850cede1e8..d5e1c0ce2a2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -152,8 +152,9 @@ struct draw_context /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; - /** constant buffer (for vertex shader) */ - const void *constants; + /** constant buffer (for vertex/geometry shader) */ + const void *vs_constants; + const void *gs_constants; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -171,6 +172,8 @@ struct draw_context boolean force_passthrough; /**< never clip or shade */ + boolean dump_vs; + double mrd; /**< minimum resolvable depth value, for polygon offset */ /* pipe state that we need: */ @@ -211,6 +214,18 @@ struct draw_context struct translate_cache *emit_cache; } vs; + struct { + struct draw_geometry_shader *geometry_shader; + uint num_gs_outputs; /**< convenience, from geometry_shader */ + uint position_output; + + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine *machine; + + uint num_samplers; + struct tgsi_sampler **samplers; + } gs; + /* Clip derived state: */ float plane[12][4]; @@ -222,10 +237,12 @@ struct draw_context uint semantic_name; uint semantic_index; int slot; - } extra_vp_outputs; + } extra_shader_outputs; unsigned reduced_prim; + unsigned instance_id; + void *driver_private; }; @@ -245,6 +262,19 @@ void draw_vs_set_constants( struct draw_context *, +/******************************************************************************* + * Geometry shading code: + */ +boolean draw_gs_init( struct draw_context *draw ); +void draw_gs_set_constants( struct draw_context *, + const float (*constants)[4], + unsigned size ); + +/******************************************************************************* + * Common shading code: + */ +int draw_current_shader_outputs(struct draw_context *draw); +int draw_current_shader_position_output(struct draw_context *draw); /******************************************************************************* * Vertex processing (was passthrough) code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 2801dbafe47..a5ddec52863 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -280,20 +280,33 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - unsigned reduced_prim = u_reduced_prim(prim); + draw_arrays_instanced(draw, prim, start, count, 0, 1); +} + +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + unsigned reduced_prim = u_reduced_prim(mode); + unsigned instance; + if (reduced_prim != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, prim, start, MIN2(count, 20)); + draw_print_arrays(draw, mode, start, MIN2(count, 20)); #if 0 { int i; - debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", - prim, start, count); + debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", + mode, start, count); tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { @@ -311,6 +324,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, } #endif - /* drawing done here: */ - draw_pt_arrays(draw, prim, start, count); + for (instance = 0; instance < instanceCount; instance++) { + draw->instance_id = instance + startInstance; + draw_pt_arrays(draw, mode, start, count); + } } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 20edf7a227e..d5e0d92a605 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vertex_input_count, - unsigned vertex_size ); + unsigned vertex_size, + unsigned instance_id_index ); void draw_pt_fetch_run( struct pt_fetch *fetch, const unsigned *elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 064e16c295c..4fb53276bbe 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -121,10 +121,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit, emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; @@ -204,6 +206,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->run( translate, 0, vertex_count, + draw->instance_id, hw_verts ); render->unmap_vertices( render, @@ -263,6 +266,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->instance_id, hw_verts); if (0) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 305bfef4352..55e7a7b81ad 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -58,12 +58,14 @@ struct pt_fetch { */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vs_input_count, - unsigned vertex_size ) + unsigned vertex_size, + unsigned instance_id_index ) { struct draw_context *draw = fetch->draw; unsigned nr_inputs; - unsigned i, nr = 0; + unsigned i, nr = 0, ei = 0; unsigned dst_offset = 0; + unsigned num_extra_inputs = 0; struct translate_key key; fetch->vertex_size = vertex_size; @@ -78,9 +80,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { /* Need to set header->vertex_id = 0xffff somehow. */ + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; + key.element[nr].instance_divisor = 0; key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].output_offset = dst_offset; dst_offset += 1 * sizeof(float); @@ -91,19 +95,36 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, */ dst_offset += 4 * sizeof(float); } - - assert( draw->pt.nr_vertex_elements >= vs_input_count ); - nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements ); + if (instance_id_index != ~0) { + num_extra_inputs++; + } + + assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count); + + nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs); for (i = 0; i < nr_inputs; i++) { - key.element[nr].input_format = draw->pt.vertex_element[i].src_format; - key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; - key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - key.element[nr].output_offset = dst_offset; + if (i == instance_id_index) { + key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; + key.element[nr].input_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_offset = dst_offset; + + dst_offset += sizeof(uint); + } else { + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; + key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + ei++; + dst_offset += 4 * sizeof(float); + } - dst_offset += 4 * sizeof(float); nr++; } @@ -158,6 +179,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, translate->run_elts( translate, elts, count, + draw->instance_id, verts ); } @@ -183,6 +205,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, translate->run( translate, start, count, + draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e7fe6b3b768..2a604470e9a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -166,9 +166,11 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, continue; } + key.element[i].type = TRANSLATE_ELEMENT_NORMAL; key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; + key.element[i].instance_divisor = src->instance_divisor; key.element[i].output_format = output_format; key.element[i].output_offset = dst_offset; @@ -256,6 +258,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, feme->translate->run_elts( feme->translate, fetch_elts, fetch_count, + draw->instance_id, hw_verts ); if (0) { @@ -314,6 +317,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); if (0) { @@ -374,6 +378,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 932113783d0..279f4eec635 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -32,6 +32,7 @@ #include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" #include "translate/translate.h" @@ -58,6 +59,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; + unsigned i; + boolean instance_id_index = ~0; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -65,6 +68,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned nr = MAX2( vs->info.num_inputs, vs->info.num_outputs + 1 ); + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + fpme->prim = prim; fpme->opt = opt; @@ -78,7 +90,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, vs->info.num_inputs, - fpme->vertex_size ); + fpme->vertex_size, + instance_id_index ); /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ @@ -119,7 +132,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( fetch_count, 4 ); @@ -147,13 +161,21 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, */ if (opt & PT_SHADE) { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); + vshader->run_linear(vshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.vs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + if (gshader) + draw_geometry_shader_run(gshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, @@ -196,6 +218,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( count, 4 ); @@ -226,10 +249,19 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, + (const float (*)[4])draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, @@ -270,6 +302,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( count, 4 ); @@ -296,10 +329,19 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, + (const float (*)[4])draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 9dfb47837e0..55151823a14 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -100,7 +100,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; - const unsigned pos = pvs->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned clipped = 0; unsigned j; @@ -190,7 +190,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; - const unsigned pos = pvs->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned j; if (0) debug_printf("%s\n", __FUNCTION__); diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index b61fa291436..17c3b8cec26 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -50,16 +50,32 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) *first = 2; *incr = 1; break; + case PIPE_PRIM_LINES_ADJACENCY: + *first = 4; + *incr = 2; + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + *first = 4; + *incr = 1; + break; case PIPE_PRIM_TRIANGLES: *first = 3; *incr = 3; break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + *first = 6; + *incr = 3; + break; case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_TRIANGLE_FAN: case PIPE_PRIM_POLYGON: *first = 3; *incr = 1; break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + *first = 6; + *incr = 1; + break; case PIPE_PRIM_QUADS: *first = 4; *incr = 4; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index 010c7a18a7c..f0aec5febab 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -36,6 +36,10 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: for (j = 0; j < count;) { unsigned remaining = count - j; unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr ); diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 35536895326..60b4dd97042 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -83,6 +83,10 @@ draw_create_vertex_shader(struct draw_context *draw, { struct draw_vertex_shader *vs; + if (draw->dump_vs) { + tgsi_dump(shader->tokens, 0); + } + vs = draw_create_vs_llvm( draw, shader ); if (!vs) { vs = draw_create_vs_sse( draw, shader ); @@ -152,6 +156,8 @@ draw_delete_vertex_shader(struct draw_context *draw, boolean draw_vs_init( struct draw_context *draw ) { + draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE); + draw->vs.machine = tgsi_exec_machine_create(); if (!draw->vs.machine) return FALSE; diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index e3b807ebd0e..00036cfe68b 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -43,6 +43,7 @@ struct draw_varient_input enum pipe_format format; unsigned buffer; unsigned offset; + unsigned instance_divisor; }; struct draw_varient_output diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ad184bd696d..da9f3e3d35c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; - float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; - float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]; + PIPE_ALIGN_VAR(16) float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]; + PIPE_ALIGN_VAR(16) float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]; uint attr; /* convert (up to) four input verts to SoA format */ diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 7ee567d4789..9f40030f39f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -142,16 +142,18 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->fetch->run_elts( vsvg->fetch, elts, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); + /* FIXME: geometry shading? */ if (vsvg->base.key.clip) { /* not really handling clipping, just do the rhw so we can @@ -180,6 +182,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -202,12 +205,13 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->fetch->run( vsvg->fetch, start, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); @@ -238,6 +242,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -280,9 +285,11 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.nr_elements = key->nr_inputs; fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL; fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].instance_divisor = 0; fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; fetch.element[i].output_offset = i * 4 * sizeof(float); assert(fetch.element[i].output_offset < fetch.output_stride); @@ -294,17 +301,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, for (i = 0; i < key->nr_outputs; i++) { if (key->element[i].out.format != EMIT_1F_PSIZE) { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].instance_divisor = 0; emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; assert(emit.element[i].input_offset <= fetch.output_stride); } else { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; + emit.element[i].instance_divisor = 0; emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].output_offset = key->element[i].out.offset; } diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile deleted file mode 100644 index 5a96d94ec37..00000000000 --- a/src/gallium/auxiliary/gallivm/Makefile +++ /dev/null @@ -1,92 +0,0 @@ -# -*-makefile-*- -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = gallivm - - -GALLIVM_SOURCES = \ - gallivm.cpp \ - gallivm_cpu.cpp \ - instructions.cpp \ - loweringpass.cpp \ - tgsitollvm.cpp \ - storage.cpp \ - storagesoa.cpp \ - instructionssoa.cpp - -INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp - -CPP_SOURCES = \ - $(GALLIVM_SOURCES) - -C_SOURCES = -ASM_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CPP_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) - -### Include directories -INCLUDES = \ - -I. \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.cpp.o: - $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -##### TARGETS ##### - -default:: depend symlinks $(LIBNAME) - - -$(LIBNAME): $(OBJECTS) Makefile - $(TOP)/bin/mklib -o $@ -static $(OBJECTS) - - -depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ - $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null - - -gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp1.bin - -gallivmsoabuiltins.cpp: soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp2.bin - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -# Remove .o and backup files -clean: - -rm -f *.o */*.o *~ *.so *~ server/*.o - -rm -f depend depend.bak - -rm -f gallivm_builtins.cpp - -rm -f gallivmsoabuiltins.cpp - -symlinks: - - -include depend diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript deleted file mode 100644 index c0aa51b90a9..00000000000 --- a/src/gallium/auxiliary/gallivm/SConscript +++ /dev/null @@ -1,16 +0,0 @@ -Import('*') - -gallivm = env.ConvenienceLibrary( - target = 'gallivm', - source = [ - 'gallivm.cpp', - 'gallivm_cpu.cpp', - 'instructions.cpp', - 'loweringpass.cpp', - 'tgsitollvm.cpp', - 'storage.cpp', - 'storagesoa.cpp', - 'instructionssoa.cpp', - ]) - -auxiliaries.insert(0, gallivm) diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 5cafe8c3f0c..8f7d3b71004 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -552,7 +552,7 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; @@ -919,7 +919,7 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile deleted file mode 100644 index f2ebc3f410a..00000000000 --- a/src/gallium/auxiliary/indices/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = indices - -C_SOURCES = \ - u_indices_gen.c \ - u_unfilled_gen.c - -include ../../Makefile.template - -u_indices_gen.c: u_indices_gen.py - python $< > $@ - -u_unfilled_gen.c: u_unfilled_gen.py - python $< > $@ diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript deleted file mode 100644 index 712e215534f..00000000000 --- a/src/gallium/auxiliary/indices/SConscript +++ /dev/null @@ -1,28 +0,0 @@ -Import('*') - -from sys import executable as python_cmd - -env.CodeGenerate( - target = 'u_indices_gen.c', - script = 'u_indices_gen.py', - source = [], - command = python_cmd + ' $SCRIPT > $TARGET' -) - -env.CodeGenerate( - target = 'u_unfilled_gen.c', - script = 'u_unfilled_gen.py', - source = [], - command = python_cmd + ' $SCRIPT > $TARGET' -) - -indices = env.ConvenienceLibrary( - target = 'indices', - source = [ -# 'u_indices.c', -# 'u_unfilled_indices.c', - 'u_indices_gen.c', - 'u_unfilled_gen.c', - ]) - -auxiliaries.insert(0, indices) diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile deleted file mode 100644 index 1c00ba8d986..00000000000 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = pipebuffer - -C_SOURCES = \ - pb_buffer_fenced.c \ - pb_buffer_malloc.c \ - pb_bufmgr_alt.c \ - pb_bufmgr_cache.c \ - pb_bufmgr_debug.c \ - pb_bufmgr_fenced.c \ - pb_bufmgr_mm.c \ - pb_bufmgr_ondemand.c \ - pb_bufmgr_pool.c \ - pb_bufmgr_slab.c \ - pb_validate.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript deleted file mode 100644 index 8e9f06abe45..00000000000 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ /dev/null @@ -1,19 +0,0 @@ -Import('*') - -pipebuffer = env.ConvenienceLibrary( - target = 'pipebuffer', - source = [ - 'pb_buffer_fenced.c', - 'pb_buffer_malloc.c', - 'pb_bufmgr_alt.c', - 'pb_bufmgr_cache.c', - 'pb_bufmgr_debug.c', - 'pb_bufmgr_fenced.c', - 'pb_bufmgr_mm.c', - 'pb_bufmgr_ondemand.c', - 'pb_bufmgr_pool.c', - 'pb_bufmgr_slab.c', - 'pb_validate.c', - ]) - -auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index a9375abd218..a4b78f14943 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -80,27 +80,11 @@ struct fenced_buffer_list */ struct fenced_buffer { - /* - * Immutable members. - */ - struct pb_buffer base; + struct pb_buffer *buffer; - struct fenced_buffer_list *list; - - /** - * Protected by fenced_buffer_list::mutex - */ - struct list_head head; - /** - * Following members are mutable and protected by this mutex. - * - * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex - * held, but in order to prevent deadlocks you must never lock - * fenced_buffer_list::mutex with this mutex held. - */ - pipe_mutex mutex; + /* FIXME: protect access with mutex */ /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -112,6 +96,9 @@ struct fenced_buffer struct pb_validate *vl; unsigned validation_flags; struct pipe_fence_handle *fence; + + struct list_head head; + struct fenced_buffer_list *list; }; @@ -123,24 +110,15 @@ fenced_buffer(struct pb_buffer *buf) } -/** - * Add the buffer to the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order before calling this function. - * - * Reference count should be incremented before calling this function. - */ static INLINE void -fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_add(struct fenced_buffer *fenced_buf) { + struct fenced_buffer_list *fenced_list = fenced_buf->list; + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); - /* TODO: Move the reference count increment here */ - #ifdef DEBUG LIST_DEL(&fenced_buf->head); assert(fenced_list->numUnfenced); @@ -152,16 +130,32 @@ fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, /** - * Remove the buffer from the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order before calling this function. - * - * Reference count should be decremented after calling this function. + * Actually destroy the buffer. */ static INLINE void -fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(!fenced_buf->fence); +#ifdef DEBUG + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_list->numUnfenced); + --fenced_list->numUnfenced; +#else + (void)fenced_list; +#endif + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); +} + + +static INLINE void +_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { struct pb_fence_ops *ops = fenced_list->ops; @@ -183,53 +177,37 @@ fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, ++fenced_list->numUnfenced; #endif - /* TODO: Move the reference count decrement and destruction here */ + /** + * FIXME!!! + */ + + if(!pipe_is_referenced(&fenced_buf->base.base.reference)) + _fenced_buffer_destroy(fenced_buf); } -/** - * Wait for the fence to expire, and remove it from the fenced list. - * - * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be - * held -- it will - */ static INLINE enum pipe_error -fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_finish(struct fenced_buffer *fenced_buf) { + struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - enum pipe_error ret = PIPE_ERROR; #if 0 debug_warning("waiting for GPU"); #endif - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); - - /* Acquire the global lock */ - pipe_mutex_unlock(fenced_buf->mutex); - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) { - /* Remove from the fenced list */ - /* TODO: remove consequents */ - fenced_buffer_remove_locked(fenced_list, fenced_buf); - - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - - ret = PIPE_OK; + if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { + return PIPE_ERROR; } + /* Remove from the fenced list */ + /* TODO: remove consequents */ + _fenced_buffer_remove(fenced_list, fenced_buf); } - pipe_mutex_unlock(fenced_list->mutex); - - return ret; + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return PIPE_OK; } @@ -237,7 +215,7 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, * Free as many fenced buffers from the list head as possible. */ static void -fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, +_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { struct pb_fence_ops *ops = fenced_list->ops; @@ -251,30 +229,21 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, while(curr != &fenced_list->delayed) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence != prev_fence) { int signaled; if (wait) signaled = ops->fence_finish(ops, fenced_buf->fence, 0); else signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - if (signaled != 0) { - pipe_mutex_unlock(fenced_buf->mutex); + if (signaled != 0) break; - } prev_fence = fenced_buf->fence; } else { assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - fenced_buffer_remove_locked(fenced_list, fenced_buf); - pipe_mutex_unlock(fenced_buf->mutex); - - pb_buf = &fenced_buf->base; - pb_reference(&pb_buf, NULL); - + _fenced_buffer_remove(fenced_list, fenced_buf); curr = next; next = curr->next; @@ -288,25 +257,30 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(!fenced_buf->fence); - -#ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); - assert(fenced_buf->head.prev); - assert(fenced_buf->head.next); - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + if (fenced_buf->fence) { + struct pb_fence_ops *ops = fenced_list->ops; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + struct list_head *curr, *prev; + curr = &fenced_buf->head; + prev = curr->prev; + do { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); + _fenced_buffer_remove(fenced_list, fenced_buf); + curr = prev; + prev = curr->prev; + } while (curr != &fenced_list->delayed); + } + else { + /* delay destruction */ + } + } + else { + _fenced_buffer_destroy(fenced_buf); + } pipe_mutex_unlock(fenced_list->mutex); -#else - (void)fenced_list; -#endif - - pb_reference(&fenced_buf->buffer, NULL); - - pipe_mutex_destroy(fenced_buf->mutex); - FREE(fenced_buf); } @@ -317,23 +291,24 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - void *map = NULL; - - pipe_mutex_lock(fenced_buf->mutex); + void *map; assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); /* Serialize writes */ if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { - if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && - ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) { /* Don't wait for the GPU to finish writing */ - goto finish; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) + _fenced_buffer_remove(fenced_list, fenced_buf); + else + return NULL; + } + else { + /* Wait for the GPU to finish writing */ + _fenced_buffer_finish(fenced_buf); } - - /* Wait for the GPU to finish writing */ - fenced_buffer_finish_locked(fenced_list, fenced_buf); } #if 0 @@ -350,9 +325,6 @@ fenced_buffer_map(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } -finish: - pipe_mutex_unlock(fenced_buf->mutex); - return map; } @@ -361,9 +333,6 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - - pipe_mutex_lock(fenced_buf->mutex); - assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { pb_unmap(fenced_buf->buffer); @@ -371,8 +340,6 @@ fenced_buffer_unmap(struct pb_buffer *buf) if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } - - pipe_mutex_unlock(fenced_buf->mutex); } @@ -384,14 +351,11 @@ fenced_buffer_validate(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); enum pipe_error ret; - pipe_mutex_lock(fenced_buf->mutex); - if(!vl) { /* invalidate */ fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; - ret = PIPE_OK; - goto finish; + return PIPE_OK; } assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); @@ -399,17 +363,14 @@ fenced_buffer_validate(struct pb_buffer *buf, flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) { - ret = PIPE_ERROR_RETRY; - goto finish; - } + if(fenced_buf->vl && fenced_buf->vl != vl) + return PIPE_ERROR_RETRY; #if 0 /* Do not validate if buffer is still mapped */ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* TODO: wait for the thread that mapped the buffer to unmap it */ - ret = PIPE_ERROR_RETRY; - goto finish; + return PIPE_ERROR_RETRY; } /* Final sanity checking */ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); @@ -419,21 +380,17 @@ fenced_buffer_validate(struct pb_buffer *buf, if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ - ret = PIPE_OK; - goto finish; + return PIPE_OK; } ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - goto finish; + return ret; fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; -finish: - pipe_mutex_unlock(fenced_buf->mutex); - - return ret; + return PIPE_OK; } @@ -448,36 +405,29 @@ fenced_buffer_fence(struct pb_buffer *buf, fenced_buf = fenced_buffer(buf); fenced_list = fenced_buf->list; ops = fenced_list->ops; - - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); - - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - - if(fence != fenced_buf->fence) { - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - - if (fenced_buf->fence) { - fenced_buffer_remove_locked(fenced_list, fenced_buf); - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - } - if (fence) { - ops->fence_reference(ops, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - p_atomic_inc(&fenced_buf->base.base.reference.count); - fenced_buffer_add_locked(fenced_list, fenced_buf); - } - - pb_fence(fenced_buf->buffer, fence); - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; + if(fence == fenced_buf->fence) { + /* Nothing to do */ + return; } - pipe_mutex_unlock(fenced_buf->mutex); + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + _fenced_buffer_add(fenced_buf); + } pipe_mutex_unlock(fenced_list->mutex); + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; } @@ -487,7 +437,6 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - /* NOTE: accesses immutable members only -- mutex not necessary */ pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); } @@ -527,8 +476,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, buf->buffer = buffer; buf->list = fenced_list; - pipe_mutex_init(buf->mutex); - #ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); @@ -570,7 +517,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, wait); + _fenced_buffer_list_check_free(fenced_list, wait); pipe_mutex_unlock(fenced_list->mutex); } @@ -592,13 +539,11 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) next = curr->next; while(curr != &fenced_list->unfenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); assert(!fenced_buf->fence); debug_printf("%10p %7u %7u\n", (void *) fenced_buf, fenced_buf->base.base.size, p_atomic_read(&fenced_buf->base.base.reference.count)); - pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -608,7 +553,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) while(curr != &fenced_list->delayed) { int signaled; fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); debug_printf("%10p %7u %7u %10p %s\n", (void *) fenced_buf, @@ -616,7 +560,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) p_atomic_read(&fenced_buf->base.base.reference.count), (void *) fenced_buf->fence, signaled == 0 ? "y" : "n"); - pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -637,8 +580,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif + _fenced_buffer_list_check_free(fenced_list, 1); pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, 1); } #ifdef DEBUG @@ -646,7 +589,6 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #endif pipe_mutex_unlock(fenced_list->mutex); - pipe_mutex_destroy(fenced_list->mutex); fenced_list->ops->destroy(fenced_list->ops); diff --git a/src/gallium/auxiliary/rbug/Makefile b/src/gallium/auxiliary/rbug/Makefile deleted file mode 100644 index cd12e8468fc..00000000000 --- a/src/gallium/auxiliary/rbug/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = rbug - -C_SOURCES = \ - rbug_connection.c \ - rbug_core.c \ - rbug_texture.c \ - rbug_context.c \ - rbug_shader.c \ - rbug_demarshal.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/rbug/SConscript b/src/gallium/auxiliary/rbug/SConscript deleted file mode 100644 index 4a9afb45d3c..00000000000 --- a/src/gallium/auxiliary/rbug/SConscript +++ /dev/null @@ -1,14 +0,0 @@ -Import('*') - -rbug = env.ConvenienceLibrary( - target = 'rbug', - source = [ - 'rbug_core.c', - 'rbug_shader.c', - 'rbug_context.c', - 'rbug_texture.c', - 'rbug_demarshal.c', - 'rbug_connection.c', - ]) - -auxiliaries.insert(0, rbug) diff --git a/src/gallium/auxiliary/rbug/rbug_context.h b/src/gallium/auxiliary/rbug/rbug_context.h index da61c2365b0..03126d6b123 100644 --- a/src/gallium/auxiliary/rbug/rbug_context.h +++ b/src/gallium/auxiliary/rbug/rbug_context.h @@ -46,7 +46,7 @@ typedef enum RBUG_BLOCK_BEFORE = 1, RBUG_BLOCK_AFTER = 2, RBUG_BLOCK_RULE = 4, - RBUG_BLOCK_MASK = 7, + RBUG_BLOCK_MASK = 7 } rbug_block_t; struct rbug_proto_context_list diff --git a/src/gallium/auxiliary/rbug/rbug_proto.h b/src/gallium/auxiliary/rbug/rbug_proto.h index d273be0166d..4f3eb75dc4d 100644 --- a/src/gallium/auxiliary/rbug/rbug_proto.h +++ b/src/gallium/auxiliary/rbug/rbug_proto.h @@ -65,7 +65,7 @@ enum rbug_opcode RBUG_OP_SHADER_DISABLE = 770, RBUG_OP_SHADER_REPLACE = 771, RBUG_OP_SHADER_LIST_REPLY = -768, - RBUG_OP_SHADER_INFO_REPLY = -769, + RBUG_OP_SHADER_INFO_REPLY = -769 }; /** diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile deleted file mode 100644 index ab8ea464c6e..00000000000 --- a/src/gallium/auxiliary/rtasm/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = rtasm - -C_SOURCES = \ - rtasm_cpu.c \ - rtasm_execmem.c \ - rtasm_x86sse.c \ - rtasm_ppc.c \ - rtasm_ppc_spe.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript deleted file mode 100644 index eb48368accb..00000000000 --- a/src/gallium/auxiliary/rtasm/SConscript +++ /dev/null @@ -1,13 +0,0 @@ -Import('*') - -rtasm = env.ConvenienceLibrary( - target = 'rtasm', - source = [ - 'rtasm_cpu.c', - 'rtasm_execmem.c', - 'rtasm_x86sse.c', - 'rtasm_ppc.c', - 'rtasm_ppc_spe.c', - ]) - -auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 01811d50114..ffed768f979 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -41,6 +41,12 @@ #define MAP_ANONYMOUS MAP_ANON #endif +#if defined(PIPE_OS_WINDOWS) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include <windows.h> +#endif #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) @@ -118,7 +124,29 @@ rtasm_exec_free(void *addr) } -#else /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */ +#elif defined(PIPE_OS_WINDOWS) + + +/* + * Avoid Data Execution Prevention. + */ + +void * +rtasm_exec_malloc(size_t size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); +} + + +void +rtasm_exec_free(void *addr) +{ + VirtualFree(addr, 0, MEM_RELEASE); +} + + +#else + /* * Just use regular memory. @@ -138,4 +166,4 @@ rtasm_exec_free(void *addr) } -#endif /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */ +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 1acf3c373eb..f675427d987 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -673,6 +673,13 @@ void x86_and( struct x86_function *p, emit_op_modrm( p, 0x23, 0x21, dst, src ); } +void x86_div( struct x86_function *p, + struct x86_reg src ) +{ + assert(src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); +} + /*********************************************************************** diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 731a6517968..f7612d416a0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -244,6 +244,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); +void x86_div( struct x86_function *p, struct x86_reg src ); void x86_cdecl_caller_push_regs( struct x86_function *p ); diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile deleted file mode 100644 index a7d111b6891..00000000000 --- a/src/gallium/auxiliary/sct/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = sct - -C_SOURCES = \ - sct.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript deleted file mode 100644 index 76927d973f8..00000000000 --- a/src/gallium/auxiliary/sct/SConscript +++ /dev/null @@ -1,9 +0,0 @@ -Import('*') - -sct = env.ConvenienceLibrary( - target = 'sct', - source = [ - 'sct.c' - ]) - -auxiliaries.insert(0, sct) diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c deleted file mode 100644 index 722d2b7e66e..00000000000 --- a/src/gallium/auxiliary/sct/sct.c +++ /dev/null @@ -1,453 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "pipe/p_state.h" -#include "sct.h" - - -struct texture_list -{ - struct pipe_texture *texture; - struct texture_list *next; -}; - - - -#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1) - -struct sct_context -{ - const struct pipe_context *context; - - /** surfaces the context is drawing into */ - struct pipe_surface *surfaces[MAX_SURFACES]; - - /** currently bound textures */ - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; - - /** previously bound textures, used but not flushed */ - struct texture_list *textures_used; - - boolean needs_flush; - - struct sct_context *next; -}; - - - -struct sct_surface -{ - const struct pipe_surface *surface; - - /** list of contexts drawing to this surface */ - struct sct_context_list *contexts; - - struct sct_surface *next; -}; - - - -/** - * Find the surface_info for the given pipe_surface - */ -static struct sct_surface * -find_surface_info(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - struct sct_surface *si; - for (si = sct->surfaces; si; si = si->next) - if (si->surface == surface) - return si; - return NULL; -} - - -/** - * As above, but create new surface_info if surface is new. - */ -static struct sct_surface * -find_create_surface_info(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - struct sct_surface *si = find_surface_info(sct, surface); - if (si) - return si; - - /* alloc new */ - si = CALLOC_STRUCT(sct_surface); - if (si) { - si->surface = surface; - - /* insert at head */ - si->next = sct->surfaces; - sct->surfaces = si; - } - - return si; -} - - -/** - * Find a context_info for the given context. - */ -static struct sct_context * -find_context_info(struct surface_context_tracker *sct, - const struct pipe_context *context) -{ - struct sct_context *ci; - for (ci = sct->contexts; ci; ci = ci->next) - if (ci->context == context) - return ci; - return NULL; -} - - -/** - * As above, but create new context_info if context is new. - */ -static struct sct_context * -find_create_context_info(struct surface_context_tracker *sct, - const struct pipe_context *context) -{ - struct sct_context *ci = find_context_info(sct, context); - if (ci) - return ci; - - /* alloc new */ - ci = CALLOC_STRUCT(sct_context); - if (ci) { - ci->context = context; - - /* insert at head */ - ci->next = sct->contexts; - sct->contexts = ci; - } - - return ci; -} - - -/** - * Is the context already bound to the surface? - */ -static boolean -find_surface_context(const struct sct_surface *si, - const struct pipe_context *context) -{ - const struct sct_context_list *cl; - for (cl = si->contexts; cl; cl = cl->next) { - if (cl->context == context) { - return TRUE; - } - } - return FALSE; -} - - -/** - * Add a context to the list of contexts associated with a surface. - */ -static void -add_context_to_surface(struct sct_surface *si, - const struct pipe_context *context) -{ - struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list); - if (cl) { - cl->context = context; - /* insert at head of list of contexts */ - cl->next = si->contexts; - si->contexts = cl; - } -} - - -/** - * Remove a context from the list of contexts associated with a surface. - */ -static void -remove_context_from_surface(struct sct_surface *si, - const struct pipe_context *context) -{ - struct sct_context_list *prev = NULL, *curr, *next; - - for (curr = si->contexts; curr; curr = next) { - if (curr->context == context) { - /* remove */ - if (prev) - prev->next = curr->next; - else - si->contexts = curr->next; - next = curr->next; - FREE(curr); - } - else { - prev = curr; - next = curr->next; - } - } -} - - -/** - * Unbind context from surface. - */ -static void -unbind_context_surface(struct surface_context_tracker *sct, - struct pipe_context *context, - struct pipe_surface *surface) -{ - struct sct_surface *si = find_surface_info(sct, surface); - if (si) { - remove_context_from_surface(si, context); - } -} - - -/** - * Bind context to a set of surfaces (color + Z). - * Like MakeCurrent(). - */ -void -sct_bind_surfaces(struct surface_context_tracker *sct, - struct pipe_context *context, - uint num_surf, - struct pipe_surface **surfaces) -{ - struct sct_context *ci = find_create_context_info(sct, context); - uint i; - - if (!ci) { - return; /* out of memory */ - } - - /* unbind currently bound surfaces */ - for (i = 0; i < MAX_SURFACES; i++) { - if (ci->surfaces[i]) { - unbind_context_surface(sct, context, ci->surfaces[i]); - } - } - - /* bind new surfaces */ - for (i = 0; i < num_surf; i++) { - struct sct_surface *si = find_create_surface_info(sct, surfaces[i]); - if (!find_surface_context(si, context)) { - add_context_to_surface(si, context); - } - } -} - - -/** - * Return list of contexts bound to a surface. - */ -const struct sct_context_list * -sct_get_surface_contexts(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - const struct sct_surface *si = find_surface_info(sct, surface); - return si->contexts; -} - - - -static boolean -find_texture(const struct sct_context *ci, - const struct pipe_texture *texture) -{ - const struct texture_list *tl; - - for (tl = ci->textures_used; tl; tl = tl->next) { - if (tl->texture == texture) { - return TRUE; - } - } - return FALSE; -} - - -/** - * Add the given texture to the context's list of used textures. - */ -static void -add_texture_used(struct sct_context *ci, - struct pipe_texture *texture) -{ - if (!find_texture(ci, texture)) { - /* add to list */ - struct texture_list *tl = CALLOC_STRUCT(texture_list); - if (tl) { - pipe_texture_reference(&tl->texture, texture); - /* insert at head */ - tl->next = ci->textures_used; - ci->textures_used = tl; - } - } -} - - -/** - * Bind a texture to a rendering context. - */ -void -sct_bind_texture(struct surface_context_tracker *sct, - struct pipe_context *context, - uint unit, - struct pipe_texture *tex) -{ - struct sct_context *ci = find_context_info(sct, context); - - if (ci->textures[unit] != tex) { - /* put texture on the 'used' list */ - add_texture_used(ci, tex); - /* bind new */ - pipe_texture_reference(&ci->textures[unit], tex); - } -} - - -/** - * Check if the given texture has been used by the rendering context - * since the last call to sct_flush_textures(). - */ -boolean -sct_is_texture_used(struct surface_context_tracker *sct, - const struct pipe_context *context, - const struct pipe_texture *texture) -{ - const struct sct_context *ci = find_context_info(sct, context); - return find_texture(ci, texture); -} - - -/** - * To be called when the image contents of a texture are changed, such - * as for gl[Copy]TexSubImage(). - * XXX this may not be needed - */ -void -sct_update_texture(struct pipe_texture *tex) -{ - -} - - -/** - * When a scene is flushed/rendered we can release the list of - * used textures. - */ -void -sct_flush_textures(struct surface_context_tracker *sct, - struct pipe_context *context) -{ - struct sct_context *ci = find_context_info(sct, context); - struct texture_list *tl, *next; - uint i; - - for (tl = ci->textures_used; tl; tl = next) { - next = tl->next; - pipe_texture_reference(&tl->texture, NULL); - FREE(tl); - } - ci->textures_used = NULL; - - /* put the currently bound textures on the 'used' list */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - add_texture_used(ci, ci->textures[i]); - } -} - - - -void -sct_destroy_context(struct surface_context_tracker *sct, - struct pipe_context *context) -{ - /* XXX should we require an unbinding first? */ - { - struct sct_surface *si; - for (si = sct->surfaces; si; si = si->next) { - remove_context_from_surface(si, context); - } - } - - /* remove context from context_info list */ - { - struct sct_context *ci, *next, *prev = NULL; - for (ci = sct->contexts; ci; ci = next) { - next = ci->next; - if (ci->context == context) { - if (prev) - prev->next = ci->next; - else - sct->contexts = ci->next; - FREE(ci); - } - else { - prev = ci; - } - } - } - -} - - -void -sct_destroy_surface(struct surface_context_tracker *sct, - struct pipe_surface *surface) -{ - if (1) { - /* debug/sanity: no context should be bound to surface */ - struct sct_context *ci; - uint i; - for (ci = sct->contexts; ci; ci = ci->next) { - for (i = 0; i < MAX_SURFACES; i++) { - assert(ci->surfaces[i] != surface); - } - } - } - - /* remove surface from sct_surface list */ - { - struct sct_surface *si, *next, *prev = NULL; - for (si = sct->surfaces; si; si = next) { - next = si->next; - if (si->surface == surface) { - /* unlink */ - if (prev) - prev->next = si->next; - else - sct->surfaces = si->next; - FREE(si); - } - else { - prev = si; - } - } - } -} diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h deleted file mode 100644 index cf7c4d3bdfd..00000000000 --- a/src/gallium/auxiliary/sct/sct.h +++ /dev/null @@ -1,123 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Surface/Context Tracking - * - * For some drivers, we need to monitor the binding between contexts and - * surfaces/textures. - * This code may evolve quite a bit... - */ - - -#ifndef SCT_H -#define SCT_H - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_context; -struct pipe_surface; - -struct sct_context; -struct sct_surface; - - -/** - * Per-device info, basically - */ -struct surface_context_tracker -{ - struct sct_context *contexts; - struct sct_surface *surfaces; -}; - - - -/** - * Simple linked list of contexts - */ -struct sct_context_list -{ - const struct pipe_context *context; - struct sct_context_list *next; -}; - - - -extern void -sct_bind_surfaces(struct surface_context_tracker *sct, - struct pipe_context *context, - uint num_surf, - struct pipe_surface **surfaces); - - -extern void -sct_bind_texture(struct surface_context_tracker *sct, - struct pipe_context *context, - uint unit, - struct pipe_texture *texture); - - -extern void -sct_update_texture(struct pipe_texture *tex); - - -extern boolean -sct_is_texture_used(struct surface_context_tracker *sct, - const struct pipe_context *context, - const struct pipe_texture *texture); - -extern void -sct_flush_textures(struct surface_context_tracker *sct, - struct pipe_context *context); - - -extern const struct sct_context_list * -sct_get_surface_contexts(struct surface_context_tracker *sct, - const struct pipe_surface *surf); - - -extern void -sct_destroy_context(struct surface_context_tracker *sct, - struct pipe_context *context); - - -extern void -sct_destroy_surface(struct surface_context_tracker *sct, - struct pipe_surface *surface); - - - -#ifdef __cplusplus -} -#endif - -#endif /* SCT_H */ diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c deleted file mode 100644 index 6227f199628..00000000000 --- a/src/gallium/auxiliary/sct/usage.c +++ /dev/null @@ -1,61 +0,0 @@ -/* surface / context tracking */ - - -/* - -context A: - render to texture T - -context B: - texture from T - ------------------------ - -flush surface: - which contexts are bound to the surface? - ------------------------ - -glTexSubImage(): - which contexts need to be flushed? - - */ - - -/* - -in MakeCurrent(): - - call sct_bind_surfaces(context, list of surfaces) to update the - dependencies between context and surfaces - - -in SurfaceFlush(), or whatever it is in D3D: - - call sct_get_surface_contexts(surface) to get a list of contexts - which are currently bound to the surface. - - - -in BindTexture(): - - call sct_bind_texture(context, texture) to indicate that the texture - is used in the scene. - - -in glTexSubImage() or RenderToTexture(): - - call sct_is_texture_used(context, texture) to determine if the texture - has been used in the scene, but the scene's not flushed. If TRUE is - returned it means the scene has to be rendered/flushed before the contents - of the texture can be changed. - - -in psb_scene_flush/terminate(): - - call sct_flush_textures(context) to tell the SCT that the textures which - were used in the scene can be released. - - - -*/ diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile deleted file mode 100644 index 5f0a580b096..00000000000 --- a/src/gallium/auxiliary/tgsi/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = tgsi - -C_SOURCES = \ - tgsi_sanity.c \ - tgsi_build.c \ - tgsi_dump.c \ - tgsi_exec.c \ - tgsi_info.c \ - tgsi_iterate.c \ - tgsi_parse.c \ - tgsi_ppc.c \ - tgsi_scan.c \ - tgsi_sse2.c \ - tgsi_text.c \ - tgsi_transform.c \ - tgsi_ureg.c \ - tgsi_util.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript deleted file mode 100644 index b6bc2924f06..00000000000 --- a/src/gallium/auxiliary/tgsi/SConscript +++ /dev/null @@ -1,23 +0,0 @@ -Import('*') - -tgsi = env.ConvenienceLibrary( - target = 'tgsi', - source = [ - 'tgsi_build.c', - 'tgsi_dump.c', - 'tgsi_dump_c.c', - 'tgsi_exec.c', - 'tgsi_info.c', - 'tgsi_iterate.c', - 'tgsi_parse.c', - 'tgsi_sanity.c', - 'tgsi_scan.c', - 'tgsi_ppc.c', - 'tgsi_sse2.c', - 'tgsi_text.c', - 'tgsi_transform.c', - 'tgsi_ureg.c', - 'tgsi_util.c', - ]) - -auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 92903fe57f3..de9cbc86305 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -1025,7 +1025,7 @@ tgsi_build_full_property( size++; *property = tgsi_build_property( - TGSI_PROPERTY_GS_INPUT_PRIM, + full_prop->Property.PropertyName, header ); assert( full_prop->Property.NrTokens <= 8 + 1 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 5e7e5d2ff9c..d7ff262f30a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -122,12 +122,16 @@ static const char *semantic_names[] = "GENERIC", "NORMAL", "FACE", - "EDGEFLAG" + "EDGEFLAG", + "PRIM_ID", + "INSTANCEID" }; static const char *immediate_type_names[] = { - "FLT32" + "FLT32", + "UINT32", + "INT32" }; static const char *swizzle_names[] = @@ -174,13 +178,19 @@ static const char *primitive_names[] = static void -_dump_register( +_dump_register_decl( struct dump_ctx *ctx, uint file, int first, int last ) { ENM( file, file_names ); + + /* all geometry shader inputs are two dimensional */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) + TXT("[]"); + CHR( '[' ); SID( first ); if (first != last) { @@ -191,6 +201,52 @@ _dump_register( } static void +_dump_register_dst( + struct dump_ctx *ctx, + uint file, + int index) +{ + ENM( file, file_names ); + + CHR( '[' ); + SID( index ); + CHR( ']' ); +} + + +static void +_dump_register_src( + struct dump_ctx *ctx, + const struct tgsi_full_src_register *src ) +{ + if (src->Register.Indirect) { + ENM( src->Register.File, file_names ); + CHR( '[' ); + ENM( src->Indirect.File, file_names ); + CHR( '[' ); + SID( src->Indirect.Index ); + TXT( "]." ); + ENM( src->Indirect.SwizzleX, swizzle_names ); + if (src->Register.Index != 0) { + if (src->Register.Index > 0) + CHR( '+' ); + SID( src->Register.Index ); + } + CHR( ']' ); + } else { + ENM( src->Register.File, file_names ); + CHR( '[' ); + SID( src->Register.Index ); + CHR( ']' ); + } + if (src->Register.Dimension) { + CHR( '[' ); + SID( src->Dimension.Index ); + CHR( ']' ); + } +} + +static void _dump_register_ind( struct dump_ctx *ctx, uint file, @@ -244,7 +300,7 @@ iter_declaration( TXT( "DCL " ); - _dump_register( + _dump_register_decl( ctx, decl->Declaration.File, decl->Range.First, @@ -359,6 +415,12 @@ iter_immediate( case TGSI_IMM_FLOAT32: FLT( imm->u[i].Float ); break; + case TGSI_IMM_UINT32: + UID(imm->u[i].Uint); + break; + case TGSI_IMM_INT32: + SID(imm->u[i].Int); + break; default: assert( 0 ); } @@ -435,10 +497,9 @@ iter_instruction( dst->Indirect.SwizzleX ); } else { - _dump_register( + _dump_register_dst( ctx, dst->Register.File, - dst->Register.Index, dst->Register.Index ); } _dump_writemask( ctx, dst->Register.WriteMask ); @@ -454,26 +515,11 @@ iter_instruction( CHR( ' ' ); if (src->Register.Negate) - TXT( "-(" ); + CHR( '-' ); if (src->Register.Absolute) CHR( '|' ); - if (src->Register.Indirect) { - _dump_register_ind( - ctx, - src->Register.File, - src->Register.Index, - src->Indirect.File, - src->Indirect.Index, - src->Indirect.SwizzleX ); - } - else { - _dump_register( - ctx, - src->Register.File, - src->Register.Index, - src->Register.Index ); - } + _dump_register_src(ctx, src); if (src->Register.SwizzleX != TGSI_SWIZZLE_X || src->Register.SwizzleY != TGSI_SWIZZLE_Y || @@ -488,8 +534,6 @@ iter_instruction( if (src->Register.Absolute) CHR( '|' ); - if (src->Register.Negate) - CHR( ')' ); first_reg = FALSE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 123117cb0a3..118a638ab48 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,6 +61,7 @@ #include "util/u_memory.h" #include "util/u_math.h" + #define FAST_MATH 1 #define TILE_TOP_LEFT 0 @@ -67,11 +69,329 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static void +micro_abs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = fabsf(src->f[0]); + dst->f[1] = fabsf(src->f[1]); + dst->f[2] = fabsf(src->f[2]); + dst->f[3] = fabsf(src->f[3]); +} + +static void +micro_arl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0]); + dst->i[1] = (int)floorf(src->f[1]); + dst->i[2] = (int)floorf(src->f[2]); + dst->i[3] = (int)floorf(src->f[3]); +} + +static void +micro_arr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0] + 0.5f); + dst->i[1] = (int)floorf(src->f[1] + 0.5f); + dst->i[2] = (int)floorf(src->f[2] + 0.5f); + dst->i[3] = (int)floorf(src->f[3] + 0.5f); +} + +static void +micro_ceil(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = ceilf(src->f[0]); + dst->f[1] = ceilf(src->f[1]); + dst->f[2] = ceilf(src->f[2]); + dst->f[3] = ceilf(src->f[3]); +} + +static void +micro_cos(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = cosf(src->f[0]); + dst->f[1] = cosf(src->f[1]); + dst->f[2] = cosf(src->f[2]); + dst->f[3] = cosf(src->f[3]); +} + +static void +micro_ddx(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; +} + +static void +micro_exp2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2(src->f[0]); + dst->f[1] = util_fast_exp2(src->f[1]); + dst->f[2] = util_fast_exp2(src->f[2]); + dst->f[3] = util_fast_exp2(src->f[3]); +#else +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif /* DEBUG */ + + dst->f[0] = powf(2.0f, src->f[0]); + dst->f[1] = powf(2.0f, src->f[1]); + dst->f[2] = powf(2.0f, src->f[2]); + dst->f[3] = powf(2.0f, src->f[3]); +#endif /* FAST_MATH */ +} + +static void +micro_flr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0]); + dst->f[1] = floorf(src->f[1]); + dst->f[2] = floorf(src->f[2]); + dst->f[3] = floorf(src->f[3]); +} + +static void +micro_frc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] - floorf(src->f[0]); + dst->f[1] = src->f[1] - floorf(src->f[1]); + dst->f[2] = src->f[2] - floorf(src->f[2]); + dst->f[3] = src->f[3] - floorf(src->f[3]); +} + +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_lg2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2(src->f[0]); + dst->f[1] = util_fast_log2(src->f[1]); + dst->f[2] = util_fast_log2(src->f[2]); + dst->f[3] = util_fast_log2(src->f[3]); +#else + dst->f[0] = logf(src->f[0]) * 1.442695f; + dst->f[1] = logf(src->f[1]) * 1.442695f; + dst->f[2] = logf(src->f[2]) * 1.442695f; + dst->f[3] = logf(src->f[3]) * 1.442695f; +#endif +} + +static void +micro_lrp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0]; + dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1]; + dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2]; + dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3]; +} + +static void +micro_mad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0]; + dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1]; + dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2]; + dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + +static void +micro_rcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / src->f[0]; + dst->f[1] = 1.0f / src->f[1]; + dst->f[2] = 1.0f / src->f[2]; + dst->f[3] = 1.0f / src->f[3]; +} + +static void +micro_rnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); +} + +static void +micro_rsq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); + dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); + dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); + dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); +} + +static void +micro_seq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sgn(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +micro_sgt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = sinf(src->f[0]); + dst->f[1] = sinf(src->f[1]); + dst->f[2] = sinf(src->f[2]); + dst->f[3] = sinf(src->f[3]); +} + +static void +micro_sle(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_slt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_trunc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; +} + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -123,23 +443,19 @@ /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#ifdef DEBUG -static void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); -} -#endif +#define CHECK_INF_OR_NAN(chan) do {\ + assert(!util_is_inf_or_nan((chan)->f[0]));\ + assert(!util_is_inf_or_nan((chan)->f[1]));\ + assert(!util_is_inf_or_nan((chan)->f[2]));\ + assert(!util_is_inf_or_nan((chan)->f[3]));\ + } while (0) #ifdef DEBUG @@ -292,6 +608,14 @@ tgsi_exec_machine_bind_shader( * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { + unsigned reg; + for (reg = parse.FullToken.FullDeclaration.Range.First; + reg <= parse.FullToken.FullDeclaration.Range.Last; + ++reg) { + ++mach->NumOutputs; + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -372,6 +696,7 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ @@ -413,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) align_free(mach); } - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - static void micro_add( union tgsi_exec_channel *dst, @@ -437,76 +750,6 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } -#if 0 -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} -#endif - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; -} - static void micro_div( union tgsi_exec_channel *dst, @@ -527,99 +770,6 @@ micro_div( } } -#if 0 -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} -#endif - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -#if 0 -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ -#if FAST_MATH - dst->f[0] = util_fast_exp2( src->f[0] ); - dst->f[1] = util_fast_exp2( src->f[1] ); - dst->f[2] = util_fast_exp2( src->f[2] ); - dst->f[3] = util_fast_exp2( src->f[3] ); -#else - -#if DEBUG - /* Inf is okay for this instruction, so clamp it to silence assertions. */ - uint i; - union tgsi_exec_channel clamped; - - for (i = 0; i < 4; i++) { - if (src->f[i] > 127.99999f) { - clamped.f[i] = 127.99999f; - } else if (src->f[i] < -126.99999f) { - clamped.f[i] = -126.99999f; - } else { - clamped.f[i] = src->f[i]; - } - } - src = &clamped; -#endif - - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -#endif -} - -#if 0 -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} -#endif - static void micro_float_clamp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -647,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst, } static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ -#if FAST_MATH - dst->f[0] = util_fast_log2( src->f[0] ); - dst->f[1] = util_fast_log2( src->f[1] ); - dst->f[2] = util_fast_log2( src->f[2] ); - dst->f[3] = util_fast_log2( src->f[3] ); -#else - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -#endif -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -725,38 +810,6 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } -#if 0 -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -#if 0 -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} -#endif - static void micro_max( union tgsi_exec_channel *dst, @@ -769,34 +822,6 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - static void micro_min( union tgsi_exec_channel *dst, @@ -809,48 +834,6 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - -#if 0 -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} -#endif - static void micro_mul( union tgsi_exec_channel *dst, @@ -865,20 +848,6 @@ micro_mul( #if 0 static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} -#endif - -#if 0 -static void micro_imul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, @@ -942,42 +911,6 @@ micro_neg( dst->f[3] = -src->f[3]; } -#if 0 -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} -#endif - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - static void micro_pow( union tgsi_exec_channel *dst, @@ -998,88 +931,6 @@ micro_pow( } static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_sgn( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; - dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; - dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; - dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -#if 0 -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} -#endif - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { @@ -1101,31 +952,6 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } -#if 0 -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} -#endif - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - static void fetch_src_file_channel( const struct tgsi_exec_machine *mach, @@ -1215,20 +1041,28 @@ fetch_src_file_channel( default: assert( 0 ); + chan->u[0] = 0; + chan->u[1] = 0; + chan->u[2] = 0; + chan->u[3] = 0; } break; default: assert( 0 ); + chan->u[0] = 0; + chan->u[1] = 0; + chan->u[2] = 0; + chan->u[3] = 0; } } static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; uint swizzle; @@ -1277,10 +1111,10 @@ fetch_source( &indir_index ); /* add value of address register to the offset */ - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1357,10 +1191,10 @@ fetch_source( &index2, &indir_index ); - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1385,32 +1219,30 @@ fetch_source( &index, chan ); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } + } - case TGSI_UTIL_SIGN_KEEP: - break; + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } } } static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { uint i; union tgsi_exec_channel null; @@ -1419,9 +1251,9 @@ store_dest( int offset = 0; /* indirection offset */ int index; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { + CHECK_INF_OR_NAN(chan); + } /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1456,7 +1288,7 @@ store_dest( &indir_index ); /* save indirection offset */ - offset = (int) indir_index.f[0]; + offset = indir_index.i[0]; } switch (reg->Register.File) { @@ -1468,6 +1300,15 @@ store_dest( index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; +#if 0 + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + fprintf(stderr, "%f, ", chan->f[i]); + fprintf(stderr, ")\n"); + } +#endif break; case TGSI_FILE_TEMPORARY: @@ -1577,10 +1418,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) /** @@ -1638,6 +1479,35 @@ exec_kilp(struct tgsi_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +static void +emit_vertex(struct tgsi_exec_machine *mach) +{ + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + } +} + +static void +emit_primitive(struct tgsi_exec_machine *mach) +{ + unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + ++(*prim_count); + debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); + mach->Primitives[*prim_count] = 0; + } +} /* * Fetch a four texture samples using STR texture coordinates. @@ -1647,7 +1517,8 @@ fetch_texel( struct tgsi_sampler *sampler, const union tgsi_exec_channel *s, const union tgsi_exec_channel *t, const union tgsi_exec_channel *p, - float lodbias, /* XXX should be float[4] */ + const union tgsi_exec_channel *c0, + enum tgsi_sampler_control control, union tgsi_exec_channel *r, union tgsi_exec_channel *g, union tgsi_exec_channel *b, @@ -1656,7 +1527,7 @@ fetch_texel( struct tgsi_sampler *sampler, uint j; float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); for (j = 0; j < 4; j++) { r->f[j] = rgba[0][j]; @@ -1667,102 +1538,95 @@ fetch_texel( struct tgsi_sampler *sampler, } +#define TEX_MODIFIER_NONE 0 +#define TEX_MODIFIER_PROJECTED 1 +#define TEX_MODIFIER_LOD_BIAS 2 +#define TEX_MODIFIER_EXPLICIT_LOD 3 + + static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod, - boolean projected) + uint modifier) { const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; uint chan_index; - float lodBias; - /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + if (modifier != TEX_MODIFIER_NONE) { + FETCH(&r[3], 0, CHAN_W); + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; + } + } + + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; + } switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - FETCH(&r[0], 0, CHAN_X); - if (projected) { - FETCH(&r[1], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[1] ); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], lod, + control, &r[0], &r[1], &r[2], &r[3]); break; default: - assert (0); + assert(0); } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); } } @@ -1785,8 +1649,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: @@ -1799,8 +1664,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: @@ -1811,7 +1677,8 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, &r[0], &r[1], &r[2], &r[3]); break; @@ -1908,7 +1775,7 @@ exec_declaration(struct tgsi_exec_machine *mach, if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { assert(decl->Semantic.Index == 0); assert(first == last); - assert(mask = TGSI_WRITEMASK_XYZW); + assert(mask == TGSI_WRITEMASK_XYZW); mach->Inputs[first] = mach->QuadPos; } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { @@ -1954,6 +1821,585 @@ exec_declaration(struct tgsi_exec_machine *mach, } } +typedef void (* micro_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src); + +static void +exec_scalar_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src; + union tgsi_exec_channel dst; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); + op(&dst, &src); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src; + + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); + op(&dst.xyzw[chan], &src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[2]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_trinary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_dp3(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp4(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_W; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2a(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dph(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_break(struct tgsi_exec_machine *mach) +{ + if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + } else { + assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); + + mach->Switch.mask = 0x0; + + UPDATE_EXEC_MASK(mach); + } +} + +static void +exec_switch(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + mach->Switch.mask = 0x0; + mach->Switch.defaultMask = 0x0; + + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_case(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + union tgsi_exec_channel src; + uint mask = 0; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + + if (mach->Switch.selector.u[0] == src.u[0]) { + mask |= 0x1; + } + if (mach->Switch.selector.u[1] == src.u[1]) { + mask |= 0x2; + } + if (mach->Switch.selector.u[2] == src.u[2]) { + mask |= 0x4; + } + if (mach->Switch.selector.u[3] == src.u[3]) { + mask |= 0x8; + } + + mach->Switch.defaultMask |= mask; + + mach->Switch.mask |= mask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_default(struct tgsi_exec_machine *mach) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + + mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_endswitch(struct tgsi_exec_machine *mach) +{ + mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; + + UPDATE_EXEC_MASK(mach); +} + +static void +micro_i2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->i[0]; + dst->f[1] = (float)src->i[1]; + dst->f[2] = (float)src->i[2]; + dst->f[3] = (float)src->i[3]; +} + +static void +micro_not(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_shl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] << src[1].u[0]; + dst->u[1] = src[0].u[1] << src[1].u[1]; + dst->u[2] = src[0].u[2] << src[1].u[2]; + dst->u[3] = src[0].u[3] << src[1].u[3]; +} + +static void +micro_and(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] & src[1].u[0]; + dst->u[1] = src[0].u[1] & src[1].u[1]; + dst->u[2] = src[0].u[2] & src[1].u[2]; + dst->u[3] = src[0].u[3] & src[1].u[3]; +} + +static void +micro_or(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] | src[1].u[0]; + dst->u[1] = src[0].u[1] | src[1].u[1]; + dst->u[2] = src[0].u[2] | src[1].u[2]; + dst->u[3] = src[0].u[3] | src[1].u[3]; +} + +static void +micro_xor(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] ^ src[1].u[0]; + dst->u[1] = src[0].u[1] ^ src[1].u[1]; + dst->u[2] = src[0].u[2] ^ src[1].u[2]; + dst->u[3] = src[0].u[3] ^ src[1].u[3]; +} + +static void +micro_f2i(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)src->f[0]; + dst->i[1] = (int)src->f[1]; + dst->i[2] = (int)src->f[2]; + dst->i[3] = (int)src->f[3]; +} + +static void +micro_idiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] / src[1].i[0]; + dst->i[1] = src[0].i[1] / src[1].i[1]; + dst->i[2] = src[0].i[2] / src[1].i[2]; + dst->i[3] = src[0].i[3] / src[1].i[3]; +} + +static void +micro_imax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_imin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_isge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0; +} + +static void +micro_ishr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >> src[1].i[0]; + dst->i[1] = src[0].i[1] >> src[1].i[1]; + dst->i[2] = src[0].i[2] >> src[1].i[2]; + dst->i[3] = src[0].i[3] >> src[1].i[3]; +} + +static void +micro_islt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0; +} + +static void +micro_f2u(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = (uint)src->f[0]; + dst->u[1] = (uint)src->f[1]; + dst->u[2] = (uint)src->f[2]; + dst->u[3] = (uint)src->f[3]; +} + +static void +micro_u2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->u[0]; + dst->f[1] = (float)src->u[1]; + dst->f[2] = (float)src->u[2]; + dst->f[3] = (float)src->u[3]; +} + +static void +micro_uadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] + src[1].u[0]; + dst->u[1] = src[0].u[1] + src[1].u[1]; + dst->u[2] = src[0].u[2] + src[1].u[2]; + dst->u[3] = src[0].u[3] + src[1].u[3]; +} + +static void +micro_udiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] / src[1].u[0]; + dst->u[1] = src[0].u[1] / src[1].u[1]; + dst->u[2] = src[0].u[2] / src[1].u[2]; + dst->u[3] = src[0].u[3] / src[1].u[3]; +} + +static void +micro_umad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3]; +} + +static void +micro_umax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] % src[1].u[0]; + dst->u[1] = src[0].u[1] % src[1].u[1]; + dst->u[2] = src[0].u[2] % src[1].u[2]; + dst->u[3] = src[0].u[3] % src[1].u[3]; +} + +static void +micro_umul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3]; +} + +static void +micro_useq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0; +} + +static void +micro_usge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0; +} + +static void +micro_ushr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >> src[1].u[0]; + dst->u[1] = src[0].u[1] >> src[1].u[1]; + dst->u[2] = src[0].u[2] >> src[1].u[2]; + dst->u[3] = src[0].u[3] >> src[1].u[3]; +} + +static void +micro_uslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0; +} + +static void +micro_usne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0; +} + static void exec_instruction( struct tgsi_exec_machine *mach, @@ -1968,23 +2414,11 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&d[chan_index], 0, chan_index); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LIT: @@ -2021,23 +2455,11 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[0], &r[0] ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EXP: @@ -2104,54 +2526,11 @@ exec_instruction( break; case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp3(mach, inst); break; - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + case TGSI_OPCODE_DP4: + exec_dp4(mach, inst); break; case TGSI_OPCODE_DST: @@ -2208,41 +2587,15 @@ exec_instruction( break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SUB: @@ -2257,17 +2610,7 @@ exec_instruction( break; case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add(&d[chan_index], &r[0], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CND: @@ -2283,31 +2626,11 @@ exec_instruction( break; case TGSI_OPCODE_DP2A: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[2], 2, CHAN_X ); - micro_add( &r[0], &r[0], &r[2] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2a(mach, inst); break; case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CLAMP: @@ -2323,33 +2646,20 @@ exec_instruction( } break; + case TGSI_OPCODE_FLR: + exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ARR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - micro_exp2( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_POW: @@ -2402,15 +2712,9 @@ exec_instruction( } break; - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - micro_abs(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; case TGSI_OPCODE_RCC: FETCH(&r[0], 0, CHAN_X); @@ -2422,60 +2726,19 @@ exec_instruction( break; case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 1, CHAN_W); - - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dph(mach, inst); break; case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_KILP: @@ -2552,14 +2815,7 @@ exec_instruction( break; case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SFL: @@ -2569,44 +2825,19 @@ exec_instruction( break; case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_STR: @@ -2619,14 +2850,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_NONE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); break; case TGSI_OPCODE_TXD: @@ -2642,14 +2873,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); break; case TGSI_OPCODE_TXP: /* Texture lookup with projection */ /* src[0] = texcoord (src[0].w = projection) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, TRUE); + exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); break; case TGSI_OPCODE_UP2H: @@ -2711,6 +2942,10 @@ exec_instruction( assert (0); break; + case TGSI_OPCODE_ARR: + exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_BRA: assert (0); break; @@ -2730,6 +2965,8 @@ exec_instruction( mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; + mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; @@ -2737,12 +2974,17 @@ exec_instruction( /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ @@ -2775,6 +3017,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -2785,14 +3033,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_sgn(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CMP: @@ -2899,18 +3140,7 @@ exec_instruction( break; case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2(mach, inst); break; case TGSI_OPCODE_IF: @@ -2976,87 +3206,31 @@ exec_instruction( break; case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -3064,14 +3238,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -3087,13 +3254,11 @@ exec_instruction( break; case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + emit_vertex(mach); break; case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + emit_primitive(mach); break; case TGSI_OPCODE_BGNFOR: @@ -3122,11 +3287,15 @@ exec_instruction( case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; case TGSI_OPCODE_ENDFOR: @@ -3173,6 +3342,8 @@ exec_instruction( --mach->LoopLabelStackTop; assert(mach->LoopCounterStackTop > 0); --mach->LoopCounterStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; @@ -3196,15 +3367,14 @@ exec_instruction( mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->LoopLabelStackTop > 0); --mach->LoopLabelStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); + exec_break(mach); break; case TGSI_OPCODE_CONT: @@ -3235,6 +3405,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -3265,11 +3441,116 @@ exec_instruction( UPDATE_EXEC_MASK(mach); break; + case TGSI_OPCODE_F2I: + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_IDIV: + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMAX: + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMIN: + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_INEG: + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISGE: + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISHR: + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISLT: + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_F2U: + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2F: + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UADD: + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UDIV: + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAD: + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAX: + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMIN: + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMOD: + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMUL: + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USEQ: + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USGE: + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USHR: + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USLT: + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USNE: + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_SWITCH: + exec_switch(mach, inst); + break; + + case TGSI_OPCODE_CASE: + exec_case(mach, inst); + break; + + case TGSI_OPCODE_DEFAULT: + exec_default(mach); + break; + + case TGSI_OPCODE_ENDSWITCH: + exec_endswitch(mach); + break; + default: assert( 0 ); } } + #define DEBUG_EXECUTION 0 @@ -3289,9 +3570,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; + mach->Switch.mask = 0xf; + assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; @@ -3348,11 +3633,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("(%6f, %6f, %6f, %6f)\n", - temps[i].xyzw[0].f[j], - temps[i].xyzw[1].f[j], - temps[i].xyzw[2].f[j], - temps[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); } } } @@ -3366,11 +3651,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("{%6f, %6f, %6f, %6f}\n", - outputs[i].xyzw[0].f[j], - outputs[i].xyzw[1].f[j], - outputs[i].xyzw[2].f[j], - outputs[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -3392,6 +3677,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fd94c1bc440..59e3b445cc3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ extern "C" { #endif + #define MAX_LABELS (4 * 1024) /**< basically, max instructions */ #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ + /** * Registers may be treated as float, signed int or unsigned int. */ @@ -69,6 +72,11 @@ struct tgsi_interp_coef float dady[NUM_CHANNELS]; }; +enum tgsi_sampler_control { + tgsi_sampler_lod_bias, + tgsi_sampler_lod_explicit +}; + /** * Information for sampling textures, which must be implemented * by code outside the TGSI executor. @@ -80,7 +88,8 @@ struct tgsi_sampler const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); }; @@ -179,6 +188,7 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_COND_NESTING 32 #define TGSI_EXEC_MAX_LOOP_NESTING 32 +#define TGSI_EXEC_MAX_SWITCH_NESTING 32 #define TGSI_EXEC_MAX_CALL_NESTING 32 /* The maximum number of input attributes per vertex. For 2D @@ -191,6 +201,14 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 +/* The maximum number of vertices per primitive */ +#define TGSI_MAX_PRIM_VERTICES 6 + +/* The maximum number of primitives to be generated */ +#define TGSI_MAX_PRIMITIVES 64 + +/* The maximum total number of vertices */ +#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) /** function call/activation record */ struct tgsi_call_record @@ -198,10 +216,29 @@ struct tgsi_call_record uint CondStackTop; uint LoopStackTop; uint ContStackTop; + int SwitchStackTop; + int BreakStackTop; uint ReturnAddr; }; +/* Switch-case block state. */ +struct tgsi_switch_record { + uint mask; /**< execution mask */ + union tgsi_exec_channel selector; /**< a value case statements are compared to */ + uint defaultMask; /**< non-execute mask for default case */ +}; + + +enum tgsi_break_type { + TGSI_EXEC_BREAK_INSIDE_LOOP, + TGSI_EXEC_BREAK_INSIDE_SWITCH +}; + + +#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING) + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -214,8 +251,8 @@ struct tgsi_exec_machine float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS]; - struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; struct tgsi_exec_vector *Addrs; struct tgsi_exec_vector *Predicates; @@ -229,6 +266,8 @@ struct tgsi_exec_machine /* GEOMETRY processor only. */ unsigned *Primitives; + unsigned NumOutputs; + unsigned MaxGeometryShaderOutputs; /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; @@ -242,6 +281,12 @@ struct tgsi_exec_machine uint FuncMask; /**< For function calls */ uint ExecMask; /**< = CondMask & LoopMask */ + /* Current switch-case state. */ + struct tgsi_switch_record Switch; + + /* Current break type. */ + enum tgsi_break_type BreakType; + /** Condition mask stack (for nested conditionals) */ uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; int CondStackTop; @@ -262,6 +307,13 @@ struct tgsi_exec_machine uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; + /** Switch case stack */ + struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING]; + int SwitchStackTop; + + enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK]; + int BreakStackTop; + /** Function execution mask stack (for executing subroutine code) */ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; int FuncStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index be375cabb8b..de0e09cdbae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */ { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, @@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ + { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b34263da489..e4af15c156f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -124,7 +124,6 @@ OP11(I2F) OP11(NOT) OP11(TRUNC) OP12(SHL) -OP12(SHR) OP12(AND) OP12(OR) OP12(MOD) @@ -146,6 +145,28 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) +OP11(F2I) +OP12(IDIV) +OP12(IMAX) +OP12(IMIN) +OP11(INEG) +OP12(ISGE) +OP12(ISHR) +OP12(ISLT) +OP11(F2U) +OP11(U2F) +OP12(UADD) +OP12(UDIV) +OP13(UMAD) +OP12(UMAX) +OP12(UMIN) +OP12(UMOD) +OP12(UMUL) +OP12(USEQ) +OP12(USGE) +OP12(USHR) +OP12(USLT) +OP12(USNE) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index fa65ecb9975..8c7062d850c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -119,17 +119,29 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + uint imm_count; memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); + imm_count = imm->Immediate.NrTokens - 1; + switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - { - uint imm_count = imm->Immediate.NrTokens - 1; - for (i = 0; i < imm_count; i++) { - next_token(ctx, &imm->u[i]); - } + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Float); + } + break; + + case TGSI_IMM_UINT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Uint); + } + break; + + case TGSI_IMM_INT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Int); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095bb..ad553c71a57 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,8 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) const float +ppc_builtin_constants[] = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index c27579e7942..e1e4f97967d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -26,32 +26,112 @@ **************************************************************************/ #include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_prim.h" +#include "cso_cache/cso_hash.h" #include "tgsi_sanity.h" #include "tgsi_info.h" #include "tgsi_iterate.h" -typedef uint reg_flag; - -#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) - -#define MAX_REGISTERS 1024 -#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG) +typedef struct { + uint file : 28; + /* max 2 dimensions */ + uint dimensions : 4; + uint indices[2]; +} scan_register; struct sanity_check_ctx { struct tgsi_iterate_context iter; + struct cso_hash *regs_decl; + struct cso_hash *regs_used; + struct cso_hash *regs_ind_used; - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - boolean regs_ind_used[TGSI_FILE_COUNT]; uint num_imms; uint num_instructions; uint index_of_END; uint errors; uint warnings; + uint implied_array_size; }; +static INLINE unsigned +scan_register_key(const scan_register *reg) +{ + unsigned key = reg->file; + key |= (reg->indices[0] << 4); + key |= (reg->indices[1] << 18); + + return key; +} + +static void +fill_scan_register1d(scan_register *reg, + uint file, uint index) +{ + reg->file = file; + reg->dimensions = 1; + reg->indices[0] = index; + reg->indices[1] = 0; +} + +static void +fill_scan_register2d(scan_register *reg, + uint file, uint index1, uint index2) +{ + reg->file = file; + reg->dimensions = 2; + reg->indices[0] = index1; + reg->indices[1] = index2; +} + +static void +scan_register_dst(scan_register *reg, + struct tgsi_full_dst_register *dst) +{ + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); +} + +static void +scan_register_src(scan_register *reg, + struct tgsi_full_src_register *src) +{ + if (src->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + debug_assert(!src->Dimension.Indirect); + fill_scan_register2d(reg, + src->Register.File, + src->Register.Index, + src->Dimension.Index); + } else { + fill_scan_register1d(reg, + src->Register.File, + src->Register.Index); + } +} + +static scan_register * +create_scan_register_src(struct tgsi_full_src_register *src) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_src(reg, src); + + return reg; +} + +static scan_register * +create_scan_register_dst(struct tgsi_full_dst_register *dst) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_dst(reg, dst); + + return reg; +} + static void report_error( struct sanity_check_ctx *ctx, @@ -99,12 +179,12 @@ check_file_name( static boolean is_register_declared( struct sanity_check_ctx *ctx, - uint file, - int index ) + const scan_register *reg) { - assert( index >= 0 && index < MAX_REGISTERS ); - - return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; + void *data = cso_hash_find_data_from_template( + ctx->regs_decl, scan_register_key(reg), + (void*)reg, sizeof(scan_register)); + return data ? TRUE : FALSE; } static boolean @@ -112,23 +192,37 @@ is_any_register_declared( struct sanity_check_ctx *ctx, uint file ) { - uint i; + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); - for (i = 0; i < MAX_REG_FLAGS; i++) - if (ctx->regs_decl[file][i]) + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (reg->file == file) return TRUE; + iter = cso_hash_iter_next(iter); + } + return FALSE; } static boolean is_register_used( struct sanity_check_ctx *ctx, - uint file, - int index ) + scan_register *reg) { - assert( index < MAX_REGISTERS ); + void *data = cso_hash_find_data_from_template( + ctx->regs_used, scan_register_key(reg), + reg, sizeof(scan_register)); + return data ? TRUE : FALSE; +} + - return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +static boolean +is_ind_register_used( + struct sanity_check_ctx *ctx, + scan_register *reg) +{ + return cso_hash_contains(ctx->regs_ind_used, reg->file); } static const char *file_names[TGSI_FILE_COUNT] = @@ -148,31 +242,42 @@ static const char *file_names[TGSI_FILE_COUNT] = static boolean check_register_usage( struct sanity_check_ctx *ctx, - uint file, - int index, + scan_register *reg, const char *name, boolean indirect_access ) { - if (!check_file_name( ctx, file )) + if (!check_file_name( ctx, reg->file )) { + FREE(reg); return FALSE; + } if (indirect_access) { /* Note that 'index' is an offset relative to the value of the - * address register. No range checking done here. - */ - if (!is_any_register_declared( ctx, file )) - report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); - ctx->regs_ind_used[file] = TRUE; + * address register. No range checking done here.*/ + reg->indices[0] = 0; + reg->indices[1] = 0; + if (!is_any_register_declared( ctx, reg->file )) + report_error( ctx, "%s: Undeclared %s register", file_names[reg->file], name ); + if (!is_ind_register_used(ctx, reg)) + cso_hash_insert(ctx->regs_ind_used, reg->file, reg); + else + FREE(reg); } else { - if (index < 0 || index >= MAX_REGISTERS) { - report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name ); - return FALSE; + if (!is_register_declared( ctx, reg )) { + if (reg->dimensions == 2) { + report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], reg->indices[1], name ); + } + else { + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], name ); + } } - - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + if (!is_register_used( ctx, reg )) + cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); + else + FREE(reg); } return TRUE; } @@ -210,35 +315,35 @@ iter_instruction( * Mark the registers as used. */ for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + scan_register *reg = create_scan_register_dst(&inst->Dst[i]); check_register_usage( ctx, - inst->Dst[i].Register.File, - inst->Dst[i].Register.Index, + reg, "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + scan_register *reg = create_scan_register_src(&inst->Src[i]); check_register_usage( ctx, - inst->Src[i].Register.File, - inst->Src[i].Register.Index, + reg, "source", (boolean)inst->Src[i].Register.Indirect ); if (inst->Src[i].Register.Indirect) { - uint file; - int index; + scan_register *ind_reg = MALLOC(sizeof(scan_register)); - file = inst->Src[i].Indirect.File; - index = inst->Src[i].Indirect.Index; + fill_scan_register1d(ind_reg, + inst->Src[i].Indirect.File, + inst->Src[i].Indirect.Index); + if (!(ind_reg->file == TGSI_FILE_ADDRESS || ind_reg->file == TGSI_FILE_LOOP) || + ind_reg->indices[0] != 0) { + report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); + } check_register_usage( ctx, - file, - index, + ind_reg, "indirect", FALSE ); - if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } } } @@ -266,6 +371,19 @@ iter_instruction( return TRUE; } +static void +check_and_declare(struct sanity_check_ctx *ctx, + scan_register *reg) +{ + if (is_register_declared( ctx, reg)) + report_error( ctx, "%s[%u]: The same register declared more than once", + file_names[reg->file], reg->indices[0] ); + cso_hash_insert(ctx->regs_decl, + scan_register_key(reg), + reg); +} + + static boolean iter_declaration( struct tgsi_iterate_context *iter, @@ -287,9 +405,21 @@ iter_declaration( if (!check_file_name( ctx, file )) return TRUE; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - if (is_register_declared( ctx, file, i )) - report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i ); - ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + /* declared TGSI_FILE_INPUT's for geometry processor + * have an implied second dimension */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + uint vert; + for (vert = 0; vert < ctx->implied_array_size; ++vert) { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register2d(reg, file, vert, i); + check_and_declare(ctx, reg); + } + } else { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, file, i); + check_and_declare(ctx, reg); + } } return TRUE; @@ -301,8 +431,7 @@ iter_immediate( struct tgsi_full_immediate *imm ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - - assert( ctx->num_imms < MAX_REGISTERS ); + scan_register *reg; /* No immediates allowed after the first instruction. */ @@ -311,12 +440,16 @@ iter_immediate( /* Mark the register as declared. */ - ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, TGSI_FILE_IMMEDIATE, ctx->num_imms); + cso_hash_insert(ctx->regs_decl, scan_register_key(reg), reg); ctx->num_imms++; /* Check data type validity. */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 && + imm->Immediate.DataType != TGSI_IMM_UINT32 && + imm->Immediate.DataType != TGSI_IMM_INT32) { report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); return TRUE; } @@ -330,8 +463,12 @@ iter_property( struct tgsi_iterate_context *iter, struct tgsi_full_property *prop ) { - /*struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;*/ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY && + prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { + ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); + } return TRUE; } @@ -340,7 +477,6 @@ epilog( struct tgsi_iterate_context *iter ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; /* There must be an END instruction somewhere. */ @@ -350,13 +486,17 @@ epilog( /* Check if all declared registers were used. */ - for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { - uint i; - - for (i = 0; i < MAX_REGISTERS; i++) { - if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { - report_warning( ctx, "%s[%u]: Register never used", file_names[file], i ); + { + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); + + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { + report_warning( ctx, "%s[%u]: Register never used", + file_names[reg->file], reg->indices[0] ); } + iter = cso_hash_iter_next(iter); } } @@ -368,6 +508,19 @@ epilog( return TRUE; } +static void +regs_hash_destroy(struct cso_hash *hash) +{ + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + iter = cso_hash_erase(hash, iter); + assert(reg->file < TGSI_FILE_COUNT); + FREE(reg); + } + cso_hash_delete(hash); +} + boolean tgsi_sanity_check( const struct tgsi_token *tokens ) @@ -381,18 +534,23 @@ tgsi_sanity_check( ctx.iter.iterate_property = iter_property; ctx.iter.epilog = epilog; - memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); - memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); - memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); + ctx.regs_decl = cso_hash_create(); + ctx.regs_used = cso_hash_create(); + ctx.regs_ind_used = cso_hash_create(); + ctx.num_imms = 0; ctx.num_instructions = 0; ctx.index_of_END = ~0; ctx.errors = 0; ctx.warnings = 0; + ctx.implied_array_size = 0; if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; + regs_hash_destroy(ctx.regs_decl); + regs_hash_destroy(ctx.regs_used); + regs_hash_destroy(ctx.regs_ind_used); return ctx.errors == 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 0f48b0dc3a1..a6cc773003a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -229,8 +229,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->Register.File != TGSI_FILE_INPUT || - src->Register.File != TGSI_FILE_SYSTEM_VALUE || + (src->Register.File != TGSI_FILE_INPUT && + src->Register.File != TGSI_FILE_SYSTEM_VALUE) || dst->Register.File != TGSI_FILE_OUTPUT || src->Register.Index != dst->Register.Index || diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d63c75dafb3..a85cc4659e0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -1418,13 +1419,13 @@ fetch_texel( struct tgsi_sampler **sampler, sampler, *sampler, store ); - debug_printf("lodbias %f\n", store[12]); - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", + debug_printf("sample %d texcoord %f %f %f lodbias %f\n", j, store[0+j], - store[4+j]); + store[4+j], + store[8 + j], + store[12 + j]); #endif { @@ -1433,7 +1434,8 @@ fetch_texel( struct tgsi_sampler **sampler, &store[0], /* s */ &store[4], /* t */ &store[8], /* r */ - store[12], /* lodbias */ + &store[12], /* lodbias */ + tgsi_sampler_lod_bias, rgba); /* results */ memcpy( store, rgba, 16 * sizeof(float)); @@ -2144,40 +2146,50 @@ emit_instruction( break; case TGSI_OPCODE_XPD: + /* Note: we do all stores after all operands have been fetched + * to avoid src/dst register aliasing issues for an instruction + * such as: XPD TEMP[2].xyz, TEMP[0], TEMP[2]; + */ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 1, 1, CHAN_Z ); - FETCH( func, *inst, 3, 0, CHAN_Z ); + FETCH( func, *inst, 1, 1, CHAN_Z ); /* xmm[1] = src[1].z */ + FETCH( func, *inst, 3, 0, CHAN_Z ); /* xmm[3] = src[0].z */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 4, 1, CHAN_Y ); + FETCH( func, *inst, 0, 0, CHAN_Y ); /* xmm[0] = src[0].y */ + FETCH( func, *inst, 4, 1, CHAN_Y ); /* xmm[4] = src[1].y */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( func, 2, 0 ); - emit_mul( func, 2, 1 ); - emit_MOV( func, 5, 3 ); - emit_mul( func, 5, 4 ); - emit_sub( func, 2, 5 ); - STORE( func, *inst, 2, 0, CHAN_X ); + emit_MOV( func, 7, 0 ); /* xmm[7] = xmm[0] */ + emit_mul( func, 7, 1 ); /* xmm[7] = xmm[2] * xmm[1] */ + emit_MOV( func, 5, 3 ); /* xmm[5] = xmm[3] */ + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_sub( func, 7, 5 ); /* xmm[7] = xmm[2] - xmm[5] */ + /* store xmm[7] in dst.x below */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 2, 1, CHAN_X ); - FETCH( func, *inst, 5, 0, CHAN_X ); + FETCH( func, *inst, 2, 1, CHAN_X ); /* xmm[2] = src[1].x */ + FETCH( func, *inst, 5, 0, CHAN_X ); /* xmm[5] = src[0].x */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( func, 3, 2 ); - emit_mul( func, 1, 5 ); - emit_sub( func, 3, 1 ); - STORE( func, *inst, 3, 0, CHAN_Y ); + emit_mul( func, 3, 2 ); /* xmm[3] = xmm[3] * xmm[2] */ + emit_mul( func, 1, 5 ); /* xmm[1] = xmm[1] * xmm[5] */ + emit_sub( func, 3, 1 ); /* xmm[3] = xmm[3] - xmm[1] */ + /* store xmm[3] in dst.y below */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( func, 5, 4 ); - emit_mul( func, 0, 2 ); - emit_sub( func, 5, 0 ); - STORE( func, *inst, 5, 0, CHAN_Z ); + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_mul( func, 0, 2 ); /* xmm[0] = xmm[0] * xmm[2] */ + emit_sub( func, 5, 0 ); /* xmm[5] = xmm[5] - xmm[0] */ + STORE( func, *inst, 5, 0, CHAN_Z ); /* dst.z = xmm[5] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + STORE( func, *inst, 7, 0, CHAN_X ); /* dst.x = xmm[7] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + STORE( func, *inst, 3, 0, CHAN_Y ); /* dst.y = xmm[3] */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { emit_tempf( @@ -2506,7 +2518,7 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( func, inst, TRUE, FALSE ); + return 0; break; case TGSI_OPCODE_TXP: @@ -2578,7 +2590,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: return 0; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index f000958bfc0..9fcffeda368 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -27,7 +27,9 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "pipe/p_defines.h" +#include "pipe/p_inlines.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -61,18 +63,20 @@ static boolean uprcase( char c ) } /* - * Ignore case of str1 and assume str2 is already uppercase. + * Ignore case of str1 and assume str1 is already uppercase. * Return TRUE iff str1 and str2 are equal. */ static int streq_nocase_uprcase(const char *str1, const char *str2) { - while (*str1 && uprcase(*str1) == *str2) { + while (*str1 && *str2) { + if (*str1 != uprcase(*str2)) + return FALSE; str1++; str2++; } - return *str1 == *str2; + return TRUE; } static boolean str_match_no_case( const char **pcur, const char *str ) @@ -193,11 +197,26 @@ struct translate_ctx struct tgsi_token *tokens_cur; struct tgsi_token *tokens_end; struct tgsi_header *header; + unsigned processor : 4; + int implied_array_size : 5; }; static void report_error( struct translate_ctx *ctx, const char *msg ) { - debug_printf( "\nError: %s", msg ); + int line = 1; + int column = 1; + const char *itr = ctx->text; + + while (itr != ctx->cur) { + if (*itr == '\n') { + column = 1; + ++line; + } + ++column; + ++itr; + } + + debug_printf( "\nTGSI asm error: %s [%d : %d] \n", msg, line, column ); } /* Parse shader header. @@ -229,6 +248,7 @@ static boolean parse_header( struct translate_ctx *ctx ) if (ctx->tokens_cur >= ctx->tokens_end) return FALSE; *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + ctx->processor = processor; return TRUE; } @@ -325,92 +345,36 @@ parse_opt_writemask( return TRUE; } -/* <register_file_bracket> ::= <file> `[' - */ static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} +parse_register_dst( struct translate_ctx *ctx, + uint *file, + int *index ); -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; +struct parsed_src_bracket { + int index; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} + uint ind_file; + int ind_index; + uint ind_comp; +}; -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ -static boolean -parse_register_dst( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} -/* Parse source register operand. - * <register_src> ::= <register_file_bracket_index> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' - */ static boolean -parse_register_src( +parse_register_src_bracket( struct translate_ctx *ctx, - uint *file, - int *index, - uint *ind_file, - int *ind_index, - uint *ind_comp) + struct parsed_src_bracket *brackets) { const char *cur; uint uindex; - *ind_comp = TGSI_SWIZZLE_X; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; + memset(brackets, 0, sizeof(struct parsed_src_bracket)); + eat_opt_white( &ctx->cur ); + cur = ctx->cur; - if (parse_file( &cur, ind_file )) { - if (!parse_register_dst( ctx, ind_file, ind_index )) + if (parse_file( &cur, &brackets->ind_file )) { + if (!parse_register_dst( ctx, &brackets->ind_file, + &brackets->ind_index )) return FALSE; eat_opt_white( &ctx->cur ); @@ -420,16 +384,16 @@ parse_register_src( switch (uprcase(*ctx->cur)) { case 'X': - *ind_comp = TGSI_SWIZZLE_X; + brackets->ind_comp = TGSI_SWIZZLE_X; break; case 'Y': - *ind_comp = TGSI_SWIZZLE_Y; + brackets->ind_comp = TGSI_SWIZZLE_Y; break; case 'Z': - *ind_comp = TGSI_SWIZZLE_Z; + brackets->ind_comp = TGSI_SWIZZLE_Z; break; case 'W': - *ind_comp = TGSI_SWIZZLE_W; + brackets->ind_comp = TGSI_SWIZZLE_W; break; default: report_error(ctx, "Expected indirect register swizzle component `x', `y', `z' or `w'"); @@ -450,12 +414,12 @@ parse_register_src( return FALSE; } if (negate) - *index = -(int) uindex; + brackets->index = -(int) uindex; else - *index = (int) uindex; + brackets->index = (int) uindex; } else { - *index = 0; + brackets->index = 0; } } else { @@ -463,9 +427,9 @@ parse_register_src( report_error( ctx, "Expected literal unsigned integer" ); return FALSE; } - *index = (int) uindex; - *ind_file = TGSI_FILE_NULL; - *ind_index = 0; + brackets->index = (int) uindex; + brackets->ind_file = TGSI_FILE_NULL; + brackets->ind_index = 0; } eat_opt_white( &ctx->cur ); if (*ctx->cur != ']') { @@ -476,20 +440,123 @@ parse_register_src( return TRUE; } -/* Parse register declaration. - * <register_dcl> ::= <register_file_bracket_index> `]' | - * <register_file_bracket_index> `..' <index> `]' +static boolean +parse_opt_register_src_bracket( + struct translate_ctx *ctx, + struct parsed_src_bracket *brackets, + int *parsed_brackets) +{ + const char *cur = ctx->cur; + + *parsed_brackets = 0; + + eat_opt_white( &cur ); + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + + if (!parse_register_src_bracket(ctx, brackets)) + return FALSE; + + *parsed_brackets = 1; + } + + return TRUE; +} + +/* <register_file_bracket> ::= <file> `[' */ static boolean -parse_register_dcl( +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse source register operand. + * <register_src> ::= <register_file_bracket_index> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' + */ +static boolean +parse_register_src( struct translate_ctx *ctx, uint *file, - int *first, - int *last ) + struct parsed_src_bracket *brackets) +{ + + brackets->ind_comp = TGSI_SWIZZLE_X; + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + if (!parse_register_src_bracket( ctx, brackets )) + return FALSE; + + return TRUE; +} + +struct parsed_dcl_bracket { + uint first; + uint last; +}; + +static boolean +parse_register_dcl_bracket( + struct translate_ctx *ctx, + struct parsed_dcl_bracket *bracket) { - if (!parse_register_file_bracket_index( ctx, file, first )) + uint uindex; + memset(bracket, 0, sizeof(struct parsed_dcl_bracket)); + + eat_opt_white( &ctx->cur ); + + if (!parse_uint( &ctx->cur, &uindex )) { + /* it can be an empty bracket [] which means its range + * is from 0 to some implied size */ + if (ctx->cur[0] == ']' && ctx->implied_array_size != 0) { + bracket->first = 0; + bracket->last = ctx->implied_array_size - 1; + goto cleanup; + } + report_error( ctx, "Expected literal unsigned integer" ); return FALSE; + } + bracket->first = (int) uindex; + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { uint uindex; @@ -499,12 +566,14 @@ parse_register_dcl( report_error( ctx, "Expected literal integer" ); return FALSE; } - *last = (int) uindex; + bracket->last = (int) uindex; eat_opt_white( &ctx->cur ); } else { - *last = *first; + bracket->last = bracket->first; } + +cleanup: if (*ctx->cur != ']') { report_error( ctx, "Expected `]' or `..'" ); return FALSE; @@ -513,6 +582,70 @@ parse_register_dcl( return TRUE; } +/* Parse register declaration. + * <register_dcl> ::= <register_file_bracket_index> `]' | + * <register_file_bracket_index> `..' <index> `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + struct parsed_dcl_bracket *brackets, + int *num_brackets) +{ + const char *cur; + + *num_brackets = 0; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + if (!parse_register_dcl_bracket( ctx, &brackets[0] )) + return FALSE; + + *num_brackets = 1; + + cur = ctx->cur; + eat_opt_white( &cur ); + + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + if (!parse_register_dcl_bracket( ctx, &brackets[1] )) + return FALSE; + /* for geometry shader we don't really care about + * the first brackets it's always the size of the + * input primitive. so we want to declare just + * the index relevant to the semantics which is in + * the second bracket */ + if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + brackets[0] = brackets[1]; + } + *num_brackets = 2; + } + + return TRUE; +} + + +/* Parse destination register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_dst( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} static boolean parse_dst_operand( @@ -582,37 +715,44 @@ parse_src_operand( struct tgsi_full_src_register *src ) { uint file; - int index; - uint ind_file; - int ind_index; - uint ind_comp; uint swizzle[4]; boolean parsed_swizzle; + struct parsed_src_bracket bracket[2]; + int parsed_opt_brackets; if (*ctx->cur == '-') { ctx->cur++; eat_opt_white( &ctx->cur ); src->Register.Negate = 1; } - + if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); src->Register.Absolute = 1; } - if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp)) + if (!parse_register_src(ctx, &file, &bracket[0])) + return FALSE; + if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) return FALSE; + src->Register.File = file; - src->Register.Index = index; - if (ind_file != TGSI_FILE_NULL) { + src->Register.Index = bracket[0].index; + if (bracket[0].ind_file != TGSI_FILE_NULL) { src->Register.Indirect = 1; - src->Indirect.File = ind_file; - src->Indirect.Index = ind_index; - src->Indirect.SwizzleX = ind_comp; - src->Indirect.SwizzleY = ind_comp; - src->Indirect.SwizzleZ = ind_comp; - src->Indirect.SwizzleW = ind_comp; + src->Indirect.File = bracket[0].ind_file; + src->Indirect.Index = bracket[0].ind_index; + src->Indirect.SwizzleX = bracket[0].ind_comp; + src->Indirect.SwizzleY = bracket[0].ind_comp; + src->Indirect.SwizzleZ = bracket[0].ind_comp; + src->Indirect.SwizzleW = bracket[0].ind_comp; + } + if (parsed_opt_brackets) { + src->Register.Dimension = 1; + src->Dimension.Indirect = 0; + src->Dimension.Dimension = 0; + src->Dimension.Index = bracket[1].index; } /* Parse optional swizzle. @@ -791,7 +931,9 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "PSIZE", "GENERIC", "NORMAL", - "FACE" + "FACE", + "EDGEFLAG", + "PRIM_ID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = @@ -805,8 +947,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; uint file; - int first; - int last; + struct parsed_dcl_bracket brackets[2]; + int num_brackets; uint writemask; const char *cur; uint advance; @@ -818,7 +960,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!parse_register_dcl( ctx, &file, &first, &last )) + if (!parse_register_dcl( ctx, &file, brackets, &num_brackets)) return FALSE; if (!parse_opt_writemask( ctx, &writemask )) return FALSE; @@ -826,8 +968,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.Range.First = first; - decl.Range.Last = last; + decl.Range.First = brackets[0].first; + decl.Range.Last = brackets[0].last; cur = ctx->cur; eat_opt_white( &cur ); @@ -1027,7 +1169,7 @@ static boolean parse_property( struct translate_ctx *ctx ) } for (property_name = 0; property_name < TGSI_PROPERTY_COUNT; ++property_name) { - if (streq_nocase_uprcase(id, property_names[property_name])) { + if (streq_nocase_uprcase(property_names[property_name], id)) { break; } } @@ -1044,6 +1186,10 @@ static boolean parse_property( struct translate_ctx *ctx ) report_error( ctx, "Unknown primitive name as property!" ); return FALSE; } + if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM && + ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + ctx->implied_array_size = u_vertices_per_prim(values[0]); + } break; default: if (!parse_uint(&ctx->cur, &values[0] )) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 1e730e53427..2b51672b8ea 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -64,6 +64,7 @@ struct ureg_tokens { }; #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 32 @@ -95,14 +96,26 @@ struct ureg_program unsigned nr_gs_inputs; struct { + unsigned index; + unsigned semantic_name; + unsigned semantic_index; + } system_value[UREG_MAX_SYSTEM_VALUE]; + unsigned nr_system_values; + + struct { unsigned semantic_name; unsigned semantic_index; } output[UREG_MAX_OUTPUT]; unsigned nr_outputs; struct { - float v[4]; + union { + float f[4]; + unsigned u[4]; + int i[4]; + } value; unsigned nr; + unsigned type; } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; @@ -299,6 +312,25 @@ ureg_DECL_gs_input(struct ureg_program *ureg, } +struct ureg_src +ureg_DECL_system_value(struct ureg_program *ureg, + unsigned index, + unsigned semantic_name, + unsigned semantic_index) +{ + if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { + ureg->system_value[ureg->nr_system_values].index = index; + ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; + ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; + ureg->nr_system_values++; + } else { + set_bad(ureg); + } + + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); +} + + struct ureg_dst ureg_DECL_output( struct ureg_program *ureg, unsigned name, @@ -486,22 +518,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, } - - -static int match_or_expand_immediate( const float *v, - unsigned nr, - float *v2, - unsigned *nr2, - unsigned *swizzle ) +static int +match_or_expand_immediate( const unsigned *v, + unsigned nr, + unsigned *v2, + unsigned *pnr2, + unsigned *swizzle ) { + unsigned nr2 = *pnr2; unsigned i, j; - + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; - for (j = 0; j < *nr2 && !found; j++) { + for (j = 0; j < nr2 && !found; j++) { if (v[i] == v2[j]) { *swizzle |= j << (i * 2); found = TRUE; @@ -509,24 +541,28 @@ static int match_or_expand_immediate( const float *v, } if (!found) { - if (*nr2 >= 4) + if (nr2 >= 4) { return FALSE; + } - v2[*nr2] = v[i]; - *swizzle |= *nr2 << (i * 2); - (*nr2)++; + v2[nr2] = v[i]; + *swizzle |= nr2 << (i * 2); + nr2++; } } + /* Actually expand immediate only when fully succeeded. + */ + *pnr2 = nr2; return TRUE; } - - -struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, - const float *v, - unsigned nr ) +static struct ureg_src +decl_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned nr, + unsigned type ) { unsigned i, j; unsigned swizzle; @@ -536,38 +572,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, */ for (i = 0; i < ureg->nr_immediates; i++) { - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + if (ureg->immediate[i].type != type) { + continue; + } + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { i = ureg->nr_immediates++; - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + ureg->immediate[i].type = type; + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } - set_bad( ureg ); + set_bad(ureg); out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - for (j = nr; j < 4; j++) + for (j = nr; j < 4; j++) { swizzle |= (swizzle & 0x3) << (j * 2); + } - return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), - (swizzle >> 0) & 0x3, - (swizzle >> 2) & 0x3, - (swizzle >> 4) & 0x3, - (swizzle >> 6) & 0x3); + return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), + (swizzle >> 0) & 0x3, + (swizzle >> 2) & 0x3, + (swizzle >> 4) & 0x3, + (swizzle >> 6) & 0x3); +} + + +struct ureg_src +ureg_DECL_immediate( struct ureg_program *ureg, + const float *v, + unsigned nr ) +{ + union { + float f[4]; + unsigned u[4]; + } fu; + unsigned int i; + + for (i = 0; i < nr; i++) { + fu.f[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); +} + + +struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); +} + + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *ureg, + const int *v, + unsigned nr ) +{ + return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } @@ -791,8 +871,8 @@ ureg_insn(struct ureg_program *ureg, unsigned i; boolean saturate; boolean predicate; - boolean negate; - unsigned swizzle[4]; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; @@ -838,8 +918,8 @@ ureg_tex_insn(struct ureg_program *ureg, unsigned i; boolean saturate; boolean predicate; - boolean negate; - unsigned swizzle[4]; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; @@ -955,21 +1035,23 @@ static void emit_decl_range( struct ureg_program *ureg, out[1].decl_range.Last = first + count - 1; } -static void emit_immediate( struct ureg_program *ureg, - const float *v ) +static void +emit_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned type ) { union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); out[0].value = 0; out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; out[0].imm.NrTokens = 5; - out[0].imm.DataType = TGSI_IMM_FLOAT32; + out[0].imm.DataType = type; out[0].imm.Padding = 0; - out[1].imm_data.Float = v[0]; - out[2].imm_data.Float = v[1]; - out[3].imm_data.Float = v[2]; - out[4].imm_data.Float = v[3]; + out[1].imm_data.Uint = v[0]; + out[2].imm_data.Uint = v[1]; + out[3].imm_data.Uint = v[2]; + out[4].imm_data.Uint = v[3]; } @@ -1003,6 +1085,15 @@ static void emit_decls( struct ureg_program *ureg ) } } + for (i = 0; i < ureg->nr_system_values; i++) { + emit_decl(ureg, + TGSI_FILE_SYSTEM_VALUE, + ureg->system_value[i].index, + ureg->system_value[i].semantic_name, + ureg->system_value[i].semantic_index, + TGSI_INTERPOLATE_CONSTANT); + } + for (i = 0; i < ureg->nr_outputs; i++) { emit_decl( ureg, TGSI_FILE_OUTPUT, @@ -1055,7 +1146,8 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, - ureg->immediate[i].v ); + ureg->immediate[i].value.u, + ureg->immediate[i].type ); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 7e3e7bcf1d3..38e2fd8d0a9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -137,6 +137,12 @@ struct ureg_src ureg_DECL_gs_input(struct ureg_program *, unsigned index); +struct ureg_src +ureg_DECL_system_value(struct ureg_program *, + unsigned index, + unsigned semantic_name, + unsigned semantic_index); + struct ureg_dst ureg_DECL_output( struct ureg_program *, unsigned semantic_name, @@ -148,6 +154,16 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *, + const int *v, + unsigned nr ); + +struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -221,6 +237,90 @@ ureg_imm1f( struct ureg_program *ureg, return ureg_DECL_immediate( ureg, v, 1 ); } +static INLINE struct ureg_src +ureg_imm4u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c, unsigned d) +{ + unsigned v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_uint( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c) +{ + unsigned v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_uint( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2u( struct ureg_program *ureg, + unsigned a, unsigned b) +{ + unsigned v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_uint( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1u( struct ureg_program *ureg, + unsigned a) +{ + return ureg_DECL_immediate_uint( ureg, &a, 1 ); +} + +static INLINE struct ureg_src +ureg_imm4i( struct ureg_program *ureg, + int a, int b, + int c, int d) +{ + int v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_int( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3i( struct ureg_program *ureg, + int a, int b, + int c) +{ + int v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_int( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2i( struct ureg_program *ureg, + int a, int b) +{ + int v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_int( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1i( struct ureg_program *ureg, + int a) +{ + return ureg_DECL_immediate_int( ureg, &a, 1 ); +} + /*********************************************************************** * Functions for patching up labels */ diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile deleted file mode 100644 index 3c82f8ae037..00000000000 --- a/src/gallium/auxiliary/translate/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = translate - -C_SOURCES = \ - translate_generic.c \ - translate_sse.c \ - translate.c \ - translate_cache.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript deleted file mode 100644 index 9553a675372..00000000000 --- a/src/gallium/auxiliary/translate/SConscript +++ /dev/null @@ -1,12 +0,0 @@ -Import('*') - -translate = env.ConvenienceLibrary( - target = 'translate', - source = [ - 'translate_generic.c', - 'translate_sse.c', - 'translate.c', - 'translate_cache.c', - ]) - -auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 34526eb0617..54ed2c1a4be 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -44,12 +44,19 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + struct translate_element { + enum translate_element_type type; enum pipe_format input_format; enum pipe_format output_format; unsigned input_buffer:8; unsigned input_offset:24; + unsigned instance_divisor; unsigned output_offset; }; @@ -74,11 +81,13 @@ struct translate { void (PIPE_CDECL *run_elts)( struct translate *, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer); void (PIPE_CDECL *run)( struct translate *, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer); }; @@ -103,8 +112,13 @@ static INLINE int translate_keysize( const struct translate_key *key ) static INLINE int translate_key_compare( const struct translate_key *a, const struct translate_key *b ) { - int keysize = translate_keysize(a); - return memcmp(a, b, keysize); + int keysize_a = translate_keysize(a); + int keysize_b = translate_keysize(b); + + if (keysize_a != keysize_b) { + return keysize_a - keysize_b; + } + return memcmp(a, b, keysize_a); } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 266e7ee81e6..24727d49888 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -46,9 +46,12 @@ struct translate_generic { struct translate translate; struct { + enum translate_element_type type; + fetch_func fetch; unsigned buffer; unsigned input_offset; + unsigned instance_divisor; emit_func emit; unsigned output_offset; @@ -568,6 +571,7 @@ static emit_func get_emit_func( enum pipe_format format ) static void PIPE_CDECL generic_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -583,13 +587,20 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); + const char *src; char *dst = (vert + tg->attrib[attr].output_offset); + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", @@ -607,6 +618,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -623,13 +635,25 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); - char *dst = (vert + tg->attrib[attr].output_offset); - tg->attrib[attr].fetch( src, data ); + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); + } else { + data[0] = (float)instance_id; + } if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", i, attr, data[0], data[1], data[2], data[3]); @@ -683,10 +707,12 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); tg->attrib[i].output_offset = key->element[i].output_offset; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index b62db8d8f33..c13e7427387 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -49,19 +49,29 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); struct translate_buffer { const void *base_ptr; unsigned stride; - void *ptr; /* updated per vertex */ }; +struct translate_buffer_varient { + unsigned buffer_index; + unsigned instance_divisor; + void *ptr; /* updated either per vertex or per instance */ +}; + + +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + struct translate_sse { struct translate translate; @@ -81,6 +91,16 @@ struct translate_sse { struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; unsigned nr_buffers; + /* Multiple buffer varients can map to a single buffer. */ + struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; + unsigned nr_buffer_varients; + + /* Multiple elements can map to a single buffer varient. */ + unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; + + boolean use_instancing; + unsigned instance_id; + run_func gen_run; run_elts_func gen_run_elts; @@ -359,32 +379,61 @@ static boolean init_inputs( struct translate_sse *p, boolean linear ) { unsigned i; - if (linear) { - for (i = 0; i < p->nr_buffers; i++) { + struct x86_reg instance_id = x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; + struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; + + if (linear || varient->instance_divisor) { struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &buffer->stride)); struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].base_ptr)); + get_offset(p, &buffer->base_ptr)); struct x86_reg elt = p->idx_EBX; - struct x86_reg tmp = p->tmp_EAX; - + struct x86_reg tmp_EAX = p->tmp_EAX; /* Calculate pointer to first attrib: + * base_ptr + stride * index, where index depends on instance divisor */ - x86_mov(p->func, tmp, buf_stride); - x86_imul(p->func, tmp, elt); - x86_add(p->func, tmp, buf_base_ptr); + if (varient->instance_divisor) { + /* Our index is instance ID divided by instance divisor. + */ + x86_mov(p->func, tmp_EAX, instance_id); + + if (varient->instance_divisor != 1) { + struct x86_reg tmp_EDX = p->machine_EDX; + struct x86_reg tmp_ECX = p->outbuf_ECX; + + /* TODO: Add x86_shr() to rtasm and use it whenever + * instance divisor is power of two. + */ + + x86_push(p->func, tmp_EDX); + x86_push(p->func, tmp_ECX); + x86_xor(p->func, tmp_EDX, tmp_EDX); + x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); + x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ + x86_pop(p->func, tmp_ECX); + x86_pop(p->func, tmp_EDX); + } + } else { + x86_mov(p->func, tmp_EAX, elt); + } + x86_imul(p->func, tmp_EAX, buf_stride); + x86_add(p->func, tmp_EAX, buf_base_ptr); /* In the linear case, keep the buffer pointer instead of the * index number. */ - if (p->nr_buffers == 1) - x86_mov( p->func, elt, tmp ); + if (linear && p->nr_buffer_varients == 1) + x86_mov(p->func, elt, tmp_EAX); else - x86_mov( p->func, buf_ptr, tmp ); + x86_mov(p->func, buf_ptr, tmp_EAX); } } @@ -394,31 +443,36 @@ static boolean init_inputs( struct translate_sse *p, static struct x86_reg get_buffer_ptr( struct translate_sse *p, boolean linear, - unsigned buf_idx, + unsigned var_idx, struct x86_reg elt ) { - if (linear && p->nr_buffers == 1) { + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + } + if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } - else if (linear) { + else if (linear || p->buffer_varient[var_idx].instance_divisor) { struct x86_reg ptr = p->tmp_EAX; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].ptr)); + get_offset(p, &p->buffer_varient[var_idx].ptr)); x86_mov(p->func, ptr, buf_ptr); return ptr; } else { struct x86_reg ptr = p->tmp_EAX; + const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].base_ptr)); + get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); @@ -436,28 +490,33 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, static boolean incr_inputs( struct translate_sse *p, boolean linear ) { - if (linear && p->nr_buffers == 1) { + if (linear && p->nr_buffer_varients == 1) { struct x86_reg stride = x86_make_disp(p->machine_EDX, get_offset(p, &p->buffer[0].stride)); - x86_add(p->func, p->idx_EBX, stride); - sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + if (p->buffer_varient[0].instance_divisor == 0) { + x86_add(p->func, p->idx_EBX, stride); + sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + } } else if (linear) { unsigned i; /* Is this worthwhile?? */ - for (i = 0; i < p->nr_buffers; i++) { + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); - x86_mov(p->func, p->tmp_EAX, buf_ptr); - x86_add(p->func, p->tmp_EAX, buf_stride); - if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); - x86_mov(p->func, buf_ptr, p->tmp_EAX); + if (varient->instance_divisor == 0) { + x86_mov(p->func, p->tmp_EAX, buf_ptr); + x86_add(p->func, p->tmp_EAX, buf_stride); + if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); + x86_mov(p->func, buf_ptr, p->tmp_EAX); + } } } else { @@ -514,7 +573,18 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); - x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4)); + x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); + + /* Load instance ID. + */ + if (p->use_instancing) { + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 4)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), + p->tmp_EAX); + } /* Get vertex count, compare to zero */ @@ -531,17 +601,18 @@ static boolean build_vertex_emit( struct translate_sse *p, label = x86_get_label(p->func); { struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); - int last_vb = -1; + int last_varient = -1; struct x86_reg vb; for (j = 0; j < p->translate.key.nr_elements; j++) { const struct translate_element *a = &p->translate.key.element[j]; + unsigned varient = p->element_to_buffer_varient[j]; /* Figure out source pointer address: */ - if (a->input_buffer != last_vb) { - last_vb = a->input_buffer; - vb = get_buffer_ptr(p, linear, a->input_buffer, elt); + if (varient != last_varient) { + last_varient = varient; + vb = get_buffer_ptr(p, linear, varient, elt); } if (!translate_attr( p, a, @@ -624,6 +695,7 @@ static void translate_sse_release( struct translate *translate ) static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -631,12 +703,14 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, p->gen_run_elts( translate, elts, count, - output_buffer ); + instance_id, + output_buffer); } static void PIPE_CDECL translate_sse_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -644,7 +718,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, p->gen_run( translate, start, count, - output_buffer ); + instance_id, + output_buffer); } @@ -666,8 +741,37 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; - for (i = 0; i < key->nr_elements; i++) - p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + for (i = 0; i < key->nr_elements; i++) { + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; + + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); + + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } + + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } + } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; + } + } if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile deleted file mode 100644 index 3ed90fd1b70..00000000000 --- a/src/gallium/auxiliary/util/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = util - -C_SOURCES = \ - u_debug.c \ - u_debug_dump.c \ - u_debug_symbol.c \ - u_debug_stack.c \ - u_blit.c \ - u_blitter.c \ - u_cache.c \ - u_cpu_detect.c \ - u_dl.c \ - u_draw_quad.c \ - u_format.c \ - u_format_access.c \ - u_format_table.c \ - u_gen_mipmap.c \ - u_handle_table.c \ - u_hash_table.c \ - u_hash.c \ - u_keymap.c \ - u_linear.c \ - u_network.c \ - u_math.c \ - u_mm.c \ - u_rect.c \ - u_simple_shaders.c \ - u_snprintf.c \ - u_stream_stdc.c \ - u_stream_wd.c \ - u_surface.c \ - u_texture.c \ - u_tile.c \ - u_time.c \ - u_timed_winsys.c \ - u_upload_mgr.c \ - u_simple_screen.c - -include ../../Makefile.template - -u_format_table.c: u_format_table.py u_format_parse.py u_format.csv - python u_format_table.py u_format.csv > $@ - -u_format_access.c: u_format_access.py u_format_parse.py u_format.csv - python u_format_access.py u_format.csv > $@ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript deleted file mode 100644 index 2a546d19dc0..00000000000 --- a/src/gallium/auxiliary/util/SConscript +++ /dev/null @@ -1,61 +0,0 @@ -Import('*') - -env.Clone() - -env.Append(CPPPATH = ['.']) - -env.CodeGenerate( - target = 'u_format_table.c', - script = 'u_format_table.py', - source = ['u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' -) - -env.CodeGenerate( - target = 'u_format_access.c', - script = 'u_format_access.py', - source = ['u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' -) - -util = env.ConvenienceLibrary( - target = 'util', - source = [ - 'u_bitmask.c', - 'u_blit.c', - 'u_blitter.c', - 'u_cache.c', - 'u_cpu_detect.c', - 'u_debug.c', - 'u_debug_dump.c', - 'u_debug_memory.c', - 'u_debug_stack.c', - 'u_debug_symbol.c', - 'u_dl.c', - 'u_draw_quad.c', - 'u_format.c', - 'u_format_access.c', - 'u_format_table.c', - 'u_gen_mipmap.c', - 'u_handle_table.c', - 'u_hash.c', - 'u_hash_table.c', - 'u_keymap.c', - 'u_network.c', - 'u_math.c', - 'u_mm.c', - 'u_rect.c', - 'u_simple_shaders.c', - 'u_snprintf.c', - 'u_stream_stdc.c', - 'u_stream_wd.c', - 'u_surface.c', - 'u_texture.c', - 'u_tile.c', - 'u_time.c', - 'u_timed_winsys.c', - 'u_upload_mgr.c', - 'u_simple_screen.c', - ]) - -auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_bitmask.c b/src/gallium/auxiliary/util/u_bitmask.c index 77587c07ec0..23c93a3ebcb 100644 --- a/src/gallium/auxiliary/util/u_bitmask.c +++ b/src/gallium/auxiliary/util/u_bitmask.c @@ -97,12 +97,12 @@ util_bitmask_resize(struct util_bitmask *bm, if(!minimum_size) return FALSE; - if(bm->size > minimum_size) + if(bm->size >= minimum_size) return TRUE; assert(bm->size % UTIL_BITMASK_BITS_PER_WORD == 0); new_size = bm->size; - while(!(new_size > minimum_size)) { + while(new_size < minimum_size) { new_size *= 2; /* Check integer overflow */ if(new_size < bm->size) @@ -136,7 +136,7 @@ util_bitmask_filled_set(struct util_bitmask *bm, unsigned index) { assert(bm->filled <= bm->size); - assert(index <= bm->size); + assert(index < bm->size); if(index == bm->filled) { ++bm->filled; @@ -149,7 +149,7 @@ util_bitmask_filled_unset(struct util_bitmask *bm, unsigned index) { assert(bm->filled <= bm->size); - assert(index <= bm->size); + assert(index < bm->size); if(index < bm->filled) bm->filled = index; @@ -182,7 +182,7 @@ util_bitmask_add(struct util_bitmask *bm) mask = 1; } found: - + /* grow the bitmask if necessary */ if(!util_bitmask_resize(bm, bm->filled)) return UTIL_BITMASK_INVALID_INDEX; @@ -198,9 +198,9 @@ unsigned util_bitmask_set(struct util_bitmask *bm, unsigned index) { - unsigned word = index / UTIL_BITMASK_BITS_PER_WORD; - unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD; - util_bitmask_word mask = 1 << bit; + unsigned word; + unsigned bit; + util_bitmask_word mask; assert(bm); @@ -208,6 +208,10 @@ util_bitmask_set(struct util_bitmask *bm, if(!util_bitmask_resize(bm, index)) return UTIL_BITMASK_INVALID_INDEX; + word = index / UTIL_BITMASK_BITS_PER_WORD; + bit = index % UTIL_BITMASK_BITS_PER_WORD; + mask = 1 << bit; + bm->words[word] |= mask; util_bitmask_filled_set(bm, index); @@ -220,15 +224,19 @@ void util_bitmask_clear(struct util_bitmask *bm, unsigned index) { - unsigned word = index / UTIL_BITMASK_BITS_PER_WORD; - unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD; - util_bitmask_word mask = 1 << bit; + unsigned word; + unsigned bit; + util_bitmask_word mask; assert(bm); if(index >= bm->size) return; + word = index / UTIL_BITMASK_BITS_PER_WORD; + bit = index % UTIL_BITMASK_BITS_PER_WORD; + mask = 1 << bit; + bm->words[word] &= ~mask; util_bitmask_filled_unset(bm, index); @@ -250,7 +258,7 @@ util_bitmask_get(struct util_bitmask *bm, return TRUE; } - if(index > bm->size) + if(index >= bm->size) return FALSE; if(bm->words[word] & mask) { diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 1f794d39a17..249a0375fc5 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -48,6 +48,8 @@ #include "util/u_simple_shaders.h" #include "util/u_texture.h" +#define INVALID_PTR ((void*)~0) + struct blitter_context_priv { struct blitter_context blitter; @@ -110,6 +112,11 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->pipe = pipe; /* init state objects for them to be considered invalid */ + ctx->blitter.saved_blend_state = INVALID_PTR; + ctx->blitter.saved_dsa_state = INVALID_PTR; + ctx->blitter.saved_rs_state = INVALID_PTR; + ctx->blitter.saved_fs = INVALID_PTR; + ctx->blitter.saved_vs = INVALID_PTR; ctx->blitter.saved_fb_state.nr_cbufs = ~0; ctx->blitter.saved_num_textures = ~0; ctx->blitter.saved_num_sampler_states = ~0; @@ -156,6 +163,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) rs_state.cull_mode = PIPE_WINDING_NONE; rs_state.bypass_vs_clip_and_viewport = 1; rs_state.gl_rasterization_rules = 1; + rs_state.flatshade = 1; ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); /* fragment shaders are created on-demand */ @@ -234,11 +242,11 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { /* make sure these CSOs have been saved */ - assert(ctx->blitter.saved_blend_state && - ctx->blitter.saved_dsa_state && - ctx->blitter.saved_rs_state && - ctx->blitter.saved_fs && - ctx->blitter.saved_vs); + assert(ctx->blitter.saved_blend_state != INVALID_PTR && + ctx->blitter.saved_dsa_state != INVALID_PTR && + ctx->blitter.saved_rs_state != INVALID_PTR && + ctx->blitter.saved_fs != INVALID_PTR && + ctx->blitter.saved_vs != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) @@ -252,11 +260,11 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); - ctx->blitter.saved_blend_state = 0; - ctx->blitter.saved_dsa_state = 0; - ctx->blitter.saved_rs_state = 0; - ctx->blitter.saved_fs = 0; - ctx->blitter.saved_vs = 0; + ctx->blitter.saved_blend_state = INVALID_PTR; + ctx->blitter.saved_dsa_state = INVALID_PTR; + ctx->blitter.saved_rs_state = INVALID_PTR; + ctx->blitter.saved_fs = INVALID_PTR; + ctx->blitter.saved_vs = INVALID_PTR; /* restore the state objects which are required to be saved before copy/fill */ @@ -371,9 +379,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, float t1 = y1 / (float)surf->height; float s2 = x2 / (float)surf->width; float t2 = y2 / (float)surf->height; - const float st[4][2] = { - {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2} - }; + float st[4][2]; + + st[0][0] = s1; + st[0][1] = t1; + st[1][0] = s2; + st[1][1] = t1; + st[2][0] = s2; + st[2][1] = t2; + st[3][0] = s1; + st[3][1] = t2; util_map_texcoords2d_onto_cubemap(surf->face, /* pointer, stride in floats */ @@ -560,45 +575,29 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_restore_CSOs(ctx); } -void util_blitter_copy(struct blitter_context *blitter, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, - unsigned width, unsigned height, - boolean ignore_stencil) +static boolean +is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, + unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2) +{ + if (sx1 >= dx2 || sx2 <= dx1 || sy1 >= dy2 || sy2 <= dy1) { + return FALSE; + } else { + return TRUE; + } +} + +static void util_blitter_do_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean is_depth) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb_state; - boolean is_stencil, is_depth; - unsigned dst_tex_usage; - - /* give up if textures are not set */ - assert(dst->texture && src->texture); - if (!dst->texture || !src->texture) - return; - - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; - dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : - PIPE_TEXTURE_USAGE_RENDER_TARGET; - /* check if we can sample from and render to the surfaces */ - /* (assuming copying a stencil buffer is not possible) */ - if ((!ignore_stencil && is_stencil) || - !screen->is_format_supported(screen, dst->format, dst->texture->target, - dst_tex_usage, 0) || - !screen->is_format_supported(screen, src->format, src->texture->target, - PIPE_TEXTURE_USAGE_SAMPLER, 0)) { - util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, - width, height); - return; - } - - /* check whether the states are properly saved */ - blitter_check_saved_CSOs(ctx); assert(blitter->saved_fb_state.nr_cbufs != ~0); assert(blitter->saved_num_textures != ~0); assert(blitter->saved_num_sampler_states != ~0); @@ -656,6 +655,108 @@ void util_blitter_copy(struct blitter_context *blitter, blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); blitter_draw_quad(ctx); + +} + +static void util_blitter_overlap_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + + struct pipe_texture texTemp; + struct pipe_texture *texture; + struct pipe_surface *tex_surf; + + /* check whether the states are properly saved */ + blitter_check_saved_CSOs(ctx); + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = dst->texture->format; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width0 = width; + texTemp.height0 = height; + texTemp.depth0 = 1; + + texture = screen->texture_create(screen, &texTemp); + if (!texture) + return; + + tex_surf = screen->get_tex_surface(screen, texture, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + + /* blit from the src to the temp */ + util_blitter_do_copy(blitter, tex_surf, 0, 0, + src, srcx, srcy, + width, height, + FALSE); + util_blitter_do_copy(blitter, dst, dstx, dsty, + tex_surf, 0, 0, + width, height, + FALSE); + pipe_surface_reference(&tex_surf, NULL); + pipe_texture_reference(&texture, NULL); + blitter_restore_CSOs(ctx); +} + +void util_blitter_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean ignore_stencil) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + boolean is_stencil, is_depth; + unsigned dst_tex_usage; + + /* give up if textures are not set */ + assert(dst->texture && src->texture); + if (!dst->texture || !src->texture) + return; + + if (dst->texture == src->texture) { + if (is_overlap(srcx, srcx + width, srcy, srcy + height, + dstx, dstx + width, dsty, dsty + height)) { + util_blitter_overlap_copy(blitter, dst, dstx, dsty, src, srcx, srcy, + width, height); + return; + } + } + + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; + dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + /* check if we can sample from and render to the surfaces */ + /* (assuming copying a stencil buffer is not possible) */ + if ((!ignore_stencil && is_stencil) || + !screen->is_format_supported(screen, dst->format, dst->texture->target, + dst_tex_usage, 0) || + !screen->is_format_supported(screen, src->format, src->texture->target, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, + width, height); + return; + } + + /* check whether the states are properly saved */ + blitter_check_saved_CSOs(ctx); + util_blitter_do_copy(blitter, + dst, dstx, dsty, + src, srcx, srcy, + width, height, is_depth); blitter_restore_CSOs(ctx); } diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 4e01123fff1..9b4e6ca2a73 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -454,7 +454,8 @@ debug_dump_flags(const struct debug_named_value *names, util_strncat(output, "|", sizeof(output)); else first = 0; - util_strncat(output, names->name, sizeof(output)); + util_strncat(output, names->name, sizeof(output) - 1); + output[sizeof(output) - 1] = '\0'; value &= ~names->value; } ++names; @@ -467,7 +468,8 @@ debug_dump_flags(const struct debug_named_value *names, first = 0; util_snprintf(rest, sizeof(rest), "0x%08lx", value); - util_strncat(output, rest, sizeof(output)); + util_strncat(output, rest, sizeof(output) - 1); + output[sizeof(output) - 1] = '\0'; } if(first) diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index abd834c741a..facc30a5534 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -188,7 +188,7 @@ void _debug_assert_fail(const char *expr, #ifdef DEBUG #define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) #else -#define debug_assert(expr) ((void)0) +#define debug_assert(expr) do { } while (0 && (expr)) #endif diff --git a/src/gallium/auxiliary/util/u_debug_dump.c b/src/gallium/auxiliary/util/u_debug_dump.c index 09866880aea..61624d05c0a 100644 --- a/src/gallium/auxiliary/util/u_debug_dump.c +++ b/src/gallium/auxiliary/util/u_debug_dump.c @@ -255,15 +255,13 @@ DEFINE_DEBUG_DUMP_CONTINUOUS(tex_mipfilter) static const char * debug_dump_tex_filter_names[] = { "PIPE_TEX_FILTER_NEAREST", - "PIPE_TEX_FILTER_LINEAR", - "PIPE_TEX_FILTER_ANISO" + "PIPE_TEX_FILTER_LINEAR" }; static const char * debug_dump_tex_filter_short_names[] = { "nearest", - "linear", - "aniso" + "linear" }; DEFINE_DEBUG_DUMP_CONTINUOUS(tex_filter) diff --git a/src/gallium/auxiliary/util/u_debug_memory.c b/src/gallium/auxiliary/util/u_debug_memory.c index 7623cb93981..d6484f4ad51 100644 --- a/src/gallium/auxiliary/util/u_debug_memory.c +++ b/src/gallium/auxiliary/util/u_debug_memory.c @@ -297,9 +297,9 @@ debug_memory_end(unsigned long start_no) if((start_no <= hdr->no && hdr->no < last_no) || (last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) { - debug_printf("%s:%u:%s: %u bytes at %p not freed\n", + debug_printf("%s:%u:%s: %lu bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, - hdr->size, ptr); + (unsigned long) hdr->size, ptr); #if DEBUG_MEMORY_STACK debug_backtrace_dump(hdr->backtrace, DEBUG_MEMORY_STACK); #endif @@ -315,8 +315,8 @@ debug_memory_end(unsigned long start_no) } if(total_size) { - debug_printf("Total of %u KB of system memory apparently leaked\n", - (total_size + 1023)/1024); + debug_printf("Total of %lu KB of system memory apparently leaked\n", + (unsigned long) (total_size + 1023)/1024); } else { debug_printf("No memory leaks detected.\n"); diff --git a/src/gallium/auxiliary/util/u_dl.h b/src/gallium/auxiliary/util/u_dl.h index 018b38543b0..85296c58af6 100644 --- a/src/gallium/auxiliary/util/u_dl.h +++ b/src/gallium/auxiliary/util/u_dl.h @@ -30,6 +30,18 @@ #define U_DL_H_ +#include "pipe/p_config.h" + + +#if defined(PIPE_OS_WINDOWS) +# define UTIL_DL_EXT ".dll" +#elif defined(PIPE_OS_APPLE) +# define UTIL_DL_EXT ".dylib" +#else +# define UTIL_DL_EXT ".so" +#endif + + struct util_dl_library; diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 866b18ff160..9f16b42944e 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -76,9 +76,9 @@ PIPE_FORMAT_R8G8_SNORM , array , 1, 1, sn8 , sn8 , , , xy01, PIPE_FORMAT_R8G8B8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , , xyz1, rgb PIPE_FORMAT_R8G8B8A8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb PIPE_FORMAT_R8G8B8X8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyz1, rgb -PIPE_FORMAT_B6G5R5_SNORM , arith , 1, 1, sn5 , sn5 , sn6 , , zyx1, rgb -PIPE_FORMAT_A8B8G8R8_SNORM , arith , 1, 1, sn8 , sn8 , sn8 , sn8 , zyxw, rgb -PIPE_FORMAT_X8B8G8R8_SNORM , arith , 1, 1, sn8 , sn8 , sn8 , sn8 , zyx1, rgb +PIPE_FORMAT_B6G5R5_SNORM , arith , 1, 1, sn5 , sn5 , sn6 , , xyz1, rgb +PIPE_FORMAT_A8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzyx, rgb +PIPE_FORMAT_X8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzy1, rgb PIPE_FORMAT_R8_SSCALED , array , 1, 1, s8 , , , , x001, rgb PIPE_FORMAT_R8G8_SSCALED , array , 1, 1, s8 , s8 , , , xy01, rgb PIPE_FORMAT_R8G8B8_SSCALED , array , 1, 1, s8 , s8 , s8 , , xyz1, rgb @@ -90,14 +90,14 @@ PIPE_FORMAT_R32G32B32_FIXED , array , 1, 1, h32 , h32 , h32 , , xyz1, PIPE_FORMAT_R32G32B32A32_FIXED , array , 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb PIPE_FORMAT_L8_SRGB , arith , 1, 1, u8 , , , , xxx1, srgb PIPE_FORMAT_A8L8_SRGB , arith , 1, 1, u8 , u8 , , , xxxy, srgb -PIPE_FORMAT_R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , , xyz1, srgb -PIPE_FORMAT_R8G8B8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , xyzw, srgb -PIPE_FORMAT_R8G8B8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , xyz1, srgb -PIPE_FORMAT_A8R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , wxyz, srgb -PIPE_FORMAT_X8R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , 1xyz, srgb -PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw, srgb -PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb -PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb +PIPE_FORMAT_R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , , xyz1, srgb +PIPE_FORMAT_R8G8B8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyzw, srgb +PIPE_FORMAT_R8G8B8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyz1, srgb +PIPE_FORMAT_A8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzwx, srgb +PIPE_FORMAT_X8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzw1, srgb +PIPE_FORMAT_B8G8R8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyxw, srgb +PIPE_FORMAT_B8G8R8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb +PIPE_FORMAT_X8UB8UG8SR8S_NORM , array , 1, 1, sn8 , sn8 , un8 , x8 , wzy1, rgb PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 090183fb174..a558923b2ed 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -119,7 +119,7 @@ enum util_format_colorspace { UTIL_FORMAT_COLORSPACE_RGB = 0, UTIL_FORMAT_COLORSPACE_SRGB = 1, UTIL_FORMAT_COLORSPACE_YUV = 2, - UTIL_FORMAT_COLORSPACE_ZS = 3, + UTIL_FORMAT_COLORSPACE_ZS = 3 }; diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 81aeb83cbb5..b2969a210a7 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -585,13 +585,12 @@ do { \ static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits) { - value *= (1<<frac_bits); - return value < 0 ? 0 : value; + return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits)); } static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits) { - return value * (1<<frac_bits); + return (int32_t)(value * (1<<frac_bits)); } diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 6269c72e121..87ee0e47685 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> @@ -54,7 +54,7 @@ u_socket_close(int s) if (s < 0) return; -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) shutdown(s, SHUT_RDWR); close(s); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) @@ -117,7 +117,7 @@ u_socket_connect(const char *hostname, uint16_t port) if (!host) return -1; - memcpy((char *)&sa.sin_addr,host->h_addr,host->h_length); + memcpy((char *)&sa.sin_addr,host->h_addr_list[0],host->h_length); sa.sin_family= host->h_addrtype; sa.sin_port = htons(port); @@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum) void u_socket_block(int s, boolean block) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) int old = fcntl(s, F_GETFL, 0); if (old == -1) return; diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h index 0aa898b9676..187dcab86e7 100644 --- a/src/gallium/auxiliary/util/u_network.h +++ b/src/gallium/auxiliary/util/u_network.h @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define PIPE_HAVE_SOCKETS -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) # define PIPE_HAVE_SOCKETS #endif diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 74343299623..10a874f3416 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -135,6 +135,39 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim ) } } +static INLINE unsigned +u_vertices_per_prim(int primitive) +{ + switch(primitive) { + case PIPE_PRIM_POINTS: + return 1; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + return 2; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + return 3; + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return 4; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return 6; + + /* following primitives should never be used + * with geometry shaders abd their size is + * undefined */ + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + default: + debug_printf("Unrecognized geometry shader primitive"); + return 3; + } +} + const char *u_prim_name( unsigned pipe_prim ); #endif diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 298fbacecba..8479161c744 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -41,7 +41,7 @@ /** * Copy 2D rect from one place to another. * Position and sizes are in pixels. - * src_pitch may be negative to do vertical flip of pixels from source. + * src_stride may be negative to do vertical flip of pixels from source. */ void util_copy_rect(ubyte * dst, @@ -54,7 +54,7 @@ util_copy_rect(ubyte * dst, const ubyte * src, int src_stride, unsigned src_x, - int src_y) + unsigned src_y) { unsigned i; int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; @@ -65,10 +65,6 @@ util_copy_rect(ubyte * dst, assert(blocksize > 0); assert(blockwidth > 0); assert(blockheight > 0); - assert(src_x >= 0); - assert(src_y >= 0); - assert(dst_x >= 0); - assert(dst_y >= 0); dst_x /= blockwidth; dst_y /= blockheight; @@ -113,8 +109,6 @@ util_fill_rect(ubyte * dst, assert(blocksize > 0); assert(blockwidth > 0); assert(blockheight > 0); - assert(dst_x >= 0); - assert(dst_y >= 0); dst_x /= blockwidth; dst_y /= blockheight; diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index 5e444ffae21..b44d821904b 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -45,7 +45,7 @@ extern void util_copy_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, - int src_stride, unsigned src_x, int src_y); + int src_stride, unsigned src_x, unsigned src_y); extern void util_fill_rect(ubyte * dst, enum pipe_format format, diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 8172ead0201..b751e29ab62 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -44,13 +44,15 @@ /** * Make simple vertex pass-through shader. + * \param num_attribs number of attributes to pass through + * \param semantic_names array of semantic names for each attribute + * \param semantic_indexes array of semantic indexes for each attribute */ void * util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint num_attribs, const uint *semantic_names, const uint *semantic_indexes) - { struct ureg_program *ureg; uint i; @@ -78,8 +80,6 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, } - - /** * Make simple fragment texture shader: * IMM {0,0,0,1} // (if writemask != 0xf) @@ -125,6 +125,12 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, return ureg_create_shader_and_destroy( ureg, pipe ); } + +/** + * Make a simple fragment shader that sets the output color to a color + * taken from a texture. + * \param tex_target one of PIPE_TEXTURE_x + */ void * util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target ) { @@ -133,6 +139,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target ) TGSI_WRITEMASK_XYZW ); } + /** * Make a simple fragment texture shader which reads an X component from * a texture and writes it as depth. @@ -177,6 +184,7 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, return ureg_create_shader_and_destroy( ureg, pipe ); } + /** * Make simple fragment color pass-through shader. */ @@ -186,15 +194,19 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) return util_make_fragment_clonecolor_shader(pipe, 1); } + +/** + * Make a fragment shader that copies the input color to N output colors. + */ void * util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs) { struct ureg_program *ureg; struct ureg_src src; - struct ureg_dst dst[8]; + struct ureg_dst dst[PIPE_MAX_COLOR_BUFS]; int i; - assert(num_cbufs <= 8); + assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c index 5cd05b29047..4d976d6dca4 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -32,7 +32,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) #include <stdio.h> diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 5b8dd1abb94..1ba82bb21f0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -1155,27 +1155,6 @@ ycbcr_get_tile_rgba(const ushort *src, } -static void -fake_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = (i ^ j) & 1 ? 1.0f : 0.0f; - } - p += dst_stride; - } -} - - void pipe_tile_raw_to_rgba(enum pipe_format format, void *src, @@ -1258,8 +1237,10 @@ pipe_tile_raw_to_rgba(enum pipe_format format, ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); - fake_get_tile_rgba(src, w, h, dst, dst_stride); + util_format_read_4f(format, + dst, dst_stride * sizeof(float), + src, util_format_get_stride(format, w), + 0, 0, w, h); } } diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile deleted file mode 100644 index 4314c1e8d69..00000000000 --- a/src/gallium/auxiliary/vl/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = vl - -C_SOURCES = \ - vl_bitstream_parser.c \ - vl_mpeg12_mc_renderer.c \ - vl_compositor.c \ - vl_csc.c \ - vl_shader_build.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript deleted file mode 100644 index aed69f5efed..00000000000 --- a/src/gallium/auxiliary/vl/SConscript +++ /dev/null @@ -1,13 +0,0 @@ -Import('*') - -vl = env.ConvenienceLibrary( - target = 'vl', - source = [ - 'vl_bitstream_parser.c', - 'vl_mpeg12_mc_renderer.c', - 'vl_compositor.c', - 'vl_csc.c', - 'vl_shader_build.c', - ]) - -auxiliaries.insert(0, vl) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 2299ed20713..5a5d391e94e 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -334,11 +334,13 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) @@ -442,11 +444,13 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) @@ -532,11 +536,13 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) @@ -658,11 +664,13 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) @@ -1081,6 +1089,9 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, assert(ycbcr_vb); assert(pos < r->macroblocks_per_batch); + mo_vec[1].x = 0; + mo_vec[1].y = 0; + switch (mb->mb_type) { case PIPE_MPEG12_MACROBLOCK_TYPE_BI: { diff --git a/src/gallium/docs/Makefile b/src/gallium/docs/Makefile new file mode 100644 index 00000000000..d4a5be41922 --- /dev/null +++ b/src/gallium/docs/Makefile @@ -0,0 +1,89 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Gallium.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Gallium.qhc" + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/src/gallium/docs/make.bat b/src/gallium/docs/make.bat new file mode 100644 index 00000000000..6f97e0730a7 --- /dev/null +++ b/src/gallium/docs/make.bat @@ -0,0 +1,113 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +set SPHINXBUILD=sphinx-build +set BUILDDIR=build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Gallium.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Gallium.ghc + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py new file mode 100644 index 00000000000..9b0c86babdb --- /dev/null +++ b/src/gallium/docs/source/conf.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# +# Gallium documentation build configuration file, created by +# sphinx-quickstart on Sun Dec 20 14:09:05 2009. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.append(os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.pngmath'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Gallium' +copyright = u'2009, VMWare, X.org, Nouveau' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.3' +# The full version, including alpha/beta/rc tags. +release = '0.3' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# The language for highlighting source code. +highlight_language = 'c' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Galliumdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'Gallium.tex', u'Gallium Documentation', + u'VMWare, X.org, Nouveau', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst new file mode 100644 index 00000000000..4767cfb0d87 --- /dev/null +++ b/src/gallium/docs/source/context.rst @@ -0,0 +1,182 @@ +Context +======= + +The context object represents the purest, most directly accessible, abilities +of the device's 3D rendering pipeline. + +Methods +------- + +CSO State +^^^^^^^^^ + +All CSO state is created, bound, and destroyed, with triplets of methods that +all follow a specific naming scheme. For example, ``create_blend_state``, +``bind_blend_state``, and ``destroy_blend_state``. + +CSO objects handled by the context object: + +* :ref:`Blend`: ``*_blend_state`` +* :ref:`Sampler`: These are special; they can be bound to either vertex or + fragment samplers, and they are bound in groups. + ``bind_fragment_sampler_states``, ``bind_vertex_sampler_states`` +* :ref:`Rasterizer`: ``*_rasterizer_state`` +* :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state`` +* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for + fragment shaders, and ``*_vs_state`` is for vertex shaders. + + +Resource Binding State +^^^^^^^^^^^^^^^^^^^^^^ + +This state describes how resources in various flavours (textures, +buffers, surfaces) are bound to the driver. + + +* ``set_constant_buffer`` +* ``set_framebuffer_state`` +* ``set_fragment_sampler_textures`` +* ``set_vertex_sampler_textures`` +* ``set_vertex_buffers`` + + +Non-CSO State +^^^^^^^^^^^^^ + +These pieces of state are too small, variable, and/or trivial to have CSO +objects. They all follow simple, one-method binding calls, e.g. +``set_edgeflags``. + +* ``set_blend_color`` +* ``set_clip_state`` +* ``set_polygon_stipple`` +* ``set_scissor_state`` +* ``set_viewport_state`` +* ``set_vertex_elements`` + + +Clearing +^^^^^^^^ + +``clear`` initializes some or all of the surfaces currently bound to +the framebuffer to particular RGBA, depth, or stencil values. + +Clear is one of the most difficult concepts to nail down to a single +interface and it seems likely that we will want to add additional +clear paths, for instance clearing surfaces not bound to the +framebuffer, or read-modify-write clears such as depth-only or +stencil-only clears of packed depth-stencil buffers. + + +Drawing +^^^^^^^ + +``draw_arrays`` draws a specified primitive. + +This command is equivalent to calling ``draw_arrays_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. + +``draw_elements`` draws a specified primitive using an optional +index buffer. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. + +``draw_range_elements`` + +XXX: this is (probably) a temporary entrypoint, as the range +information should be available from the vertex_buffer state. +Using this to quickly evaluate a specialized path in the draw +module. + +``draw_arrays_instanced`` draws multiple instances of the same primitive. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``indexBuffer`` set to NULL and ``indexSize`` set to 0. + +``draw_elements_instanced`` draws multiple instances of the same primitive +using an optional index buffer. + +For instanceID in the range between ``startInstance`` +and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive +specified by ``mode`` and sequential numbers in the range between ``start`` +and ``start``+``count``-1, inclusive. + +If ``indexBuffer`` is not NULL, it specifies an index buffer with index +byte size of ``indexSize``. The sequential numbers are used to lookup +the index buffer and the resulting indices in turn are used to fetch +vertex attributes. + +If ``indexBuffer`` is NULL, the sequential numbers are used directly +as indices to fetch vertex attributes. + +If a given vertex element has ``instance_divisor`` set to 0, it is said +it contains per-vertex data and effective vertex attribute address needs +to be recalculated for every index. + + attribAddr = ``stride`` * index + ``src_offset`` + +If a given vertex element has ``instance_divisor`` set to non-zero, +it is said it contains per-instance data and effective vertex attribute +address needs to recalculated for every ``instance_divisor``-th instance. + + attribAddr = ``stride`` * instanceID / ``instance_divisor`` + ``src_offset`` + +In the above formulas, ``src_offset`` is taken from the given vertex element +and ``stride`` is taken from a vertex buffer associated with the given +vertex element. + +The calculated attribAddr is used as an offset into the vertex buffer to +fetch the attribute data. + +The value of ``instanceID`` can be read in a vertex shader through a system +value register declared with INSTANCEID semantic name. + + +Queries +^^^^^^^ + +Queries gather some statistic from the 3D pipeline over one or more +draws. Queries may be nested, though no state tracker currently +exercises this. + +Queries can be created with ``create_query`` and deleted with +``destroy_query``. To enable a query, use ``begin_query``, and when finished, +use ``end_query`` to stop the query. Finally, ``get_query_result`` is used +to retrieve the results. + +Flushing +^^^^^^^^ + +``flush`` + + +Resource Busy Queries +^^^^^^^^^^^^^^^^^^^^^ + +``is_texture_referenced`` + +``is_buffer_referenced`` + + + +Blitting +^^^^^^^^ + +These methods emulate classic blitter controls. They are not guaranteed to be +available; if they are set to NULL, then they are not present. + +These methods operate directly on ``pipe_surface`` objects, and stand +apart from any 3D state in the context. Blitting functionality may be +moved to a separate abstraction at some point in the future. + +``surface_fill`` performs a fill operation on a section of a surface. + +``surface_copy`` blits a region of a surface to a region of another surface, +provided that both surfaces are the same format. The source and destination +may be the same surface, and overlapping blits are permitted. + +The interfaces to these calls are likely to change to make it easier +for a driver to batch multiple blits with the same source and +destination. + diff --git a/src/gallium/docs/source/cso.rst b/src/gallium/docs/source/cso.rst new file mode 100644 index 00000000000..dab1ee50f39 --- /dev/null +++ b/src/gallium/docs/source/cso.rst @@ -0,0 +1,14 @@ +CSO +=== + +CSO, Constant State Objects, are a core part of Gallium's API. + +CSO work on the principle of reusable state; they are created by filling +out a state object with the desired properties, then passing that object +to a context. The context returns an opaque context-specific handle which +can be bound at any time for the desired effect. + +.. toctree:: + :glob: + + cso/* diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst new file mode 100644 index 00000000000..fd9e4a1e2d5 --- /dev/null +++ b/src/gallium/docs/source/cso/blend.rst @@ -0,0 +1,14 @@ +.. _blend: + +Blend +===== + +This state controls blending of the final fragments into the target rendering +buffers. + +XXX it is unresolved what behavior should result if blend_enable is off. + +Members +------- + +XXX undocumented members diff --git a/src/gallium/docs/source/cso/dsa.rst b/src/gallium/docs/source/cso/dsa.rst new file mode 100644 index 00000000000..12abaa9d6fe --- /dev/null +++ b/src/gallium/docs/source/cso/dsa.rst @@ -0,0 +1,58 @@ +.. _depth,stencil,&alpha: + +Depth, Stencil, & Alpha +======================= + +These three states control the depth, stencil, and alpha tests, used to +discard fragments that have passed through the fragment shader. + +Traditionally, these three tests have been clumped together in hardware, so +they are all stored in one structure. + +During actual execution, the order of operations done on fragments is always: + +* Stencil +* Depth +* Alpha + +Depth Members +------------- + +enabled + Whether the depth test is enabled. +writemask + Whether the depth buffer receives depth writes. +func + The depth test function. One of PIPE_FUNC. + +Stencil Members +--------------- + +XXX document valuemask, writemask + +enabled + Whether the stencil test is enabled. For the second stencil, whether the + two-sided stencil is enabled. +func + The stencil test function. One of PIPE_FUNC. +ref_value + Stencil test reference value; used for certain functions. +fail_op + The operation to carry out if the stencil test fails. One of + PIPE_STENCIL_OP. +zfail_op + The operation to carry out if the stencil test passes but the depth test + fails. One of PIPE_STENCIL_OP. +zpass_op + The operation to carry out if the stencil test and depth test both pass. + One of PIPE_STENCIL_OP. + +Alpha Members +------------- + +enabled + Whether the alpha test is enabled. +func + The alpha test function. One of PIPE_FUNC. +ref_value + Alpha test reference value; used for certain functions. diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst new file mode 100644 index 00000000000..4d8e1708e7c --- /dev/null +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -0,0 +1,152 @@ +.. _rasterizer: + +Rasterizer +========== + +The rasterizer state controls the rendering of points, lines and triangles. +Attributes include polygon culling state, line width, line stipple, +multisample state, scissoring and flat/smooth shading. + + +Members +------- + +flatshade + If set, the provoking vertex of each polygon is used to determine the + color of the entire polygon. If not set, fragment colors will be + interpolated between the vertex colors. + Note that this is separate from the fragment shader input attributes + CONSTANT, LINEAR and PERSPECTIVE. We need the flatshade state at + clipping time to determine how to set the color of new vertices. + Also note that the draw module can implement flat shading by copying + the provoking vertex color to all the other vertices in the primitive. + +flatshade_first + Whether the first vertex should be the provoking vertex, for most + primitives. If not set, the last vertex is the provoking vertex. + +light_twoside + If set, there are per-vertex back-facing colors. The draw module + uses this state along with the front/back information to set the + final vertex colors prior to rasterization. + +front_winding + Indicates the window order of front-facing polygons, either + PIPE_WINDING_CW or PIPE_WINDING_CCW +cull_mode + Indicates which polygons to cull, either PIPE_WINDING_NONE (cull no + polygons), PIPE_WINDING_CW (cull clockwise-winding polygons), + PIPE_WINDING_CCW (cull counter clockwise-winding polygons), or + PIPE_WINDING_BOTH (cull all polygons). + +fill_cw + Indicates how to fill clockwise polygons, either PIPE_POLYGON_MODE_FILL, + PIPE_POLYGON_MODE_LINE or PIPE_POLYGON_MODE_POINT. +fill_ccw + Indicates how to fill counter clockwise polygons, either + PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE or PIPE_POLYGON_MODE_POINT. + +poly_stipple_enable + Whether polygon stippling is enabled. +poly_smooth + Controls OpenGL-style polygon smoothing/antialiasing +offset_cw + If set, clockwise polygons will have polygon offset factors applied +offset_ccw + If set, counter clockwise polygons will have polygon offset factors applied +offset_units + Specifies the polygon offset bias +offset_scale + Specifies the polygon offset scale + +line_width + The width of lines. +line_smooth + Whether lines should be smoothed. Line smoothing is simply anti-aliasing. +line_stipple_enable + Whether line stippling is enabled. +line_stipple_pattern + 16-bit bitfield of on/off flags, used to pattern the line stipple. +line_stipple_factor + When drawinga stippled line, each bit in the stipple pattern is + repeated N times, where N = line_stipple_factor + 1. +line_last_pixel + Controls whether the last pixel in a line is drawn or not. OpenGL + omits the last pixel to avoid double-drawing pixels at the ends of lines + when drawing connected lines. + +point_smooth + Whether points should be smoothed. Point smoothing turns rectangular + points into circles or ovals. +point_size_per_vertex + Whether vertices have a point size element. +point_size + The size of points, if not specified per-vertex. +point_size_min + The minimum size of points. +point_size_max + The maximum size of points. +point_sprite + Whether points are drawn as sprites (textured quads) +sprite_coord_mode + Specifies how the value for each shader output should be computed when + drawing sprites. If PIPE_SPRITE_COORD_NONE, don't change the vertex + shader output. Otherwise, the four vertices of the resulting quad will + be assigned texture coordinates. For PIPE_SPRITE_COORD_LOWER_LEFT, the + lower left vertex will have coordinate (0,0,0,1). + For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have + coordinate (0,0,0,1). + This state is needed by the 'draw' module because that's where each + point vertex is converted into four quad vertices. There's no other + place to emit the new vertex texture coordinates which are required for + sprite rendering. + Note that when geometry shaders are available, this state could be + removed. A special geometry shader defined by the state tracker could + converts the incoming points into quads with the proper texture coords. + +scissor + Whether the scissor test is enabled. + +multisample + Whether :ref:`MSAA` is enabled. + +bypass_vs_clip_and_viewport + Whether the entire TCL pipeline should be bypassed. This implies that + vertices are pre-transformed for the viewport, and will not be run + through the vertex shader. Note that implementations may still clip away + vertices that are not in the viewport. + +gl_rasterization_rules + Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set, + the rasterizer will use (0, 0) for pixel centers. + + +Notes +----- + +flatshade +^^^^^^^^^ + +The actual interpolated shading algorithm is obviously +implementation-dependent, but will usually be Gourard for most hardware. + +bypass_vs_clip_and_viewport +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When set, this implies that vertices are pre-transformed for the viewport, and +will not be run through the vertex shader. Note that implementations may still +clip away vertices that are not visible. + +flatshade_first +^^^^^^^^^^^^^^^ + +There are several important exceptions to the specification of this rule. + +* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first + vertex. If the caller wishes to change the provoking vertex, they merely + need to rotate the vertices themselves. +* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no + effect; the provoking vertex is always the last vertex. +* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the + second vertex, not the first. This permits each segment of the fan to have + a different color. diff --git a/src/gallium/docs/source/cso/sampler.rst b/src/gallium/docs/source/cso/sampler.rst new file mode 100644 index 00000000000..e3f1757f57a --- /dev/null +++ b/src/gallium/docs/source/cso/sampler.rst @@ -0,0 +1,46 @@ +.. _sampler: + +Sampler +======= + +Texture units have many options for selecting texels from loaded textures; +this state controls an individual texture unit's texel-sampling settings. + +Texture coordinates are always treated as four-dimensional, and referred to +with the traditional (S, T, R, Q) notation. + +Members +------- + +XXX undocumented compare_mode, compare_func + +wrap_s + How to wrap the S coordinate. One of PIPE_TEX_WRAP. +wrap_t + How to wrap the T coordinate. One of PIPE_TEX_WRAP. +wrap_r + How to wrap the R coordinate. One of PIPE_TEX_WRAP. +min_img_filter + The filter to use when minifying texels. One of PIPE_TEX_FILTER. +min_mip_filter + The filter to use when minifying mipmapped textures. One of + PIPE_TEX_FILTER. +mag_img_filter + The filter to use when magnifying texels. One of PIPE_TEX_FILTER. +normalized_coords + Whether the texture coordinates are normalized. If normalized, they will + always be in [0, 1]. If not, they will be in the range of each dimension + of the loaded texture. +prefilter + XXX From the Doxy, "weird sampling state exposed by some APIs." Refine. +lod_bias + The bias to apply to the level of detail. +min_lod + Minimum level of detail, used to clamp LoD after bias. +max_lod + Maximum level of detail, used to clamp LoD after bias. +border_color + RGBA color used for out-of-bounds coordinates. +max_anisotropy + Maximum filtering to apply anisotropically to textures. Setting this to + 1.0 effectively disables anisotropic filtering. diff --git a/src/gallium/docs/source/cso/shader.rst b/src/gallium/docs/source/cso/shader.rst new file mode 100644 index 00000000000..0ee42c87876 --- /dev/null +++ b/src/gallium/docs/source/cso/shader.rst @@ -0,0 +1,12 @@ +.. _shader: + +Shader +====== + +One of the two types of shaders supported by Gallium. + +Members +------- + +tokens + A list of tgsi_tokens. diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst new file mode 100644 index 00000000000..a448203c6fa --- /dev/null +++ b/src/gallium/docs/source/distro.rst @@ -0,0 +1,143 @@ +Distribution +============ + +Along with the interface definitions, the following drivers, state trackers, +and auxiliary modules are shipped in the standard Gallium distribution. + +Drivers +------- + +Cell +^^^^ + +Failover +^^^^^^^^ + +Deprecated. + +Intel i915 +^^^^^^^^^^ + +Intel i965 +^^^^^^^^^^ + +Highly experimental. + +Identity +^^^^^^^^ + +Wrapper driver. + +LLVM Softpipe +^^^^^^^^^^^^^ + +nVidia nv04 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv10 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv20 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv30 +^^^^^^^^^^^ + +nVidia nv40 +^^^^^^^^^^^ + +nVidia nv50 +^^^^^^^^^^^ + +VMWare SVGA +^^^^^^^^^^^ + +ATI r300 +^^^^^^^^ + +AMD/ATI r600 +^^^^^^^^^^^^ + +Highly experimental. + +Softpipe +^^^^^^^^ + +Reference software rasterizer. + +Trace +^^^^^ + +Wrapper driver. + +State Trackers +-------------- + +Direct Rendering Infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +EGL +^^^ + +GLX +^^^ + +MesaGL +^^^^^^ + +Python +^^^^^^ + +OpenVG +^^^^^^ + +WGL +^^^ + +Xorg XFree86 DDX +^^^^^^^^^^^^^^^^ + +Auxiliary +--------- + +CSO Cache +^^^^^^^^^ + +Draw +^^^^ + +Gallivm +^^^^^^^ + +Indices +^^^^^^^ + +Tool for translating or generating element indices for element-based +rendering. + +Pipe Buffer Manager +^^^^^^^^^^^^^^^^^^^ + +Remote Debugger +^^^^^^^^^^^^^^^ + +Runtime Assembly Emission +^^^^^^^^^^^^^^^^^^^^^^^^^ + +TGSI +^^^^ + +Basic utilities for manipulating TGSI streams. + +Translate +^^^^^^^^^ + +Util +^^^^ + diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst new file mode 100644 index 00000000000..6a9110ce786 --- /dev/null +++ b/src/gallium/docs/source/glossary.rst @@ -0,0 +1,10 @@ +Glossary +======== + +.. glossary:: + :sorted: + + MSAA + Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes + multiple samples of the depth buffer, and uses this information to + smooth the edges of polygons. diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst new file mode 100644 index 00000000000..54bc883fced --- /dev/null +++ b/src/gallium/docs/source/index.rst @@ -0,0 +1,28 @@ +.. Gallium documentation master file, created by + sphinx-quickstart on Sun Dec 20 14:09:05 2009. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Gallium's documentation! +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + tgsi + screen + context + cso + distro + glossary + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/src/gallium/docs/source/intro.rst b/src/gallium/docs/source/intro.rst new file mode 100644 index 00000000000..1ea103840a2 --- /dev/null +++ b/src/gallium/docs/source/intro.rst @@ -0,0 +1,9 @@ +Introduction +============ + +What is Gallium? +---------------- + +Gallium is essentially an API for writing graphics drivers in a largely +device-agnostic fashion. It provides several objects which encapsulate the +core services of graphics hardware in a straightforward manner. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst new file mode 100644 index 00000000000..9631e6967ef --- /dev/null +++ b/src/gallium/docs/source/screen.rst @@ -0,0 +1,39 @@ +Screen +====== + +A screen is an object representing the context-independent part of a device. + +Methods +------- + +XXX moar; got bored + +get_name +^^^^^^^^ + +Returns an identifying name for the screen. + +get_vendor +^^^^^^^^^^ + +Returns the screen vendor. + +get_param +^^^^^^^^^ + +Get an integer/boolean screen parameter. + +get_paramf +^^^^^^^^^^ + +Get a floating-point screen parameter. + +is_format_supported +^^^^^^^^^^^^^^^^^^^ + +See if a format can be used in a specific manner. + +texture_create +^^^^^^^^^^^^^^ + +Given a template of texture setup, create a BO-backed texture. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst new file mode 100644 index 00000000000..ebee4902b05 --- /dev/null +++ b/src/gallium/docs/source/tgsi.rst @@ -0,0 +1,1270 @@ +TGSI +==== + +TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language +for describing shaders. Since Gallium is inherently shaderful, shaders are +an important part of the API. TGSI is the only intermediate representation +used by all drivers. + +Instruction Set +--------------- + +From GL_NV_vertex_program +^^^^^^^^^^^^^^^^^^^^^^^^^ + + +ARL - Address Register Load + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +MOV - Move + +.. math:: + + dst.x = src.x + + dst.y = src.y + + dst.z = src.z + + dst.w = src.w + + +LIT - Light Coefficients + +.. math:: + + dst.x = 1 + + dst.y = max(src.x, 0) + + dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 + + dst.w = 1 + + +RCP - Reciprocal + +.. math:: + + dst.x = \frac{1}{src.x} + + dst.y = \frac{1}{src.x} + + dst.z = \frac{1}{src.x} + + dst.w = \frac{1}{src.x} + + +RSQ - Reciprocal Square Root + +.. math:: + + dst.x = \frac{1}{\sqrt{|src.x|}} + + dst.y = \frac{1}{\sqrt{|src.x|}} + + dst.z = \frac{1}{\sqrt{|src.x|}} + + dst.w = \frac{1}{\sqrt{|src.x|}} + + +EXP - Approximate Exponential Base 2 + +.. math:: + + dst.x = 2^{\lfloor src.x\rfloor} + + dst.y = src.x - \lfloor src.x\rfloor + + dst.z = 2^{src.x} + + dst.w = 1 + + +LOG - Approximate Logarithm Base 2 + +.. math:: + + dst.x = \lfloor\log_2{|src.x|}\rfloor + + dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} + + dst.z = \log_2{|src.x|} + + dst.w = 1 + + +MUL - Multiply + +.. math:: + + dst.x = src0.x \times src1.x + + dst.y = src0.y \times src1.y + + dst.z = src0.z \times src1.z + + dst.w = src0.w \times src1.w + + +ADD - Add + +.. math:: + + dst.x = src0.x + src1.x + + dst.y = src0.y + src1.y + + dst.z = src0.z + src1.z + + dst.w = src0.w + src1.w + + +DP3 - 3-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + +DP4 - 4-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + +DST - Distance Vector + +.. math:: + + dst.x = 1 + + dst.y = src0.y \times src1.y + + dst.z = src0.z + + dst.w = src1.w + + +MIN - Minimum + +.. math:: + + dst.x = min(src0.x, src1.x) + + dst.y = min(src0.y, src1.y) + + dst.z = min(src0.z, src1.z) + + dst.w = min(src0.w, src1.w) + + +MAX - Maximum + +.. math:: + + dst.x = max(src0.x, src1.x) + + dst.y = max(src0.y, src1.y) + + dst.z = max(src0.z, src1.z) + + dst.w = max(src0.w, src1.w) + + +SLT - Set On Less Than + +.. math:: + + dst.x = (src0.x < src1.x) ? 1 : 0 + + dst.y = (src0.y < src1.y) ? 1 : 0 + + dst.z = (src0.z < src1.z) ? 1 : 0 + + dst.w = (src0.w < src1.w) ? 1 : 0 + + +SGE - Set On Greater Equal Than + +.. math:: + + dst.x = (src0.x >= src1.x) ? 1 : 0 + + dst.y = (src0.y >= src1.y) ? 1 : 0 + + dst.z = (src0.z >= src1.z) ? 1 : 0 + + dst.w = (src0.w >= src1.w) ? 1 : 0 + + +MAD - Multiply And Add + +.. math:: + + dst.x = src0.x \times src1.x + src2.x + + dst.y = src0.y \times src1.y + src2.y + + dst.z = src0.z \times src1.z + src2.z + + dst.w = src0.w \times src1.w + src2.w + + +SUB - Subtract + +.. math:: + + dst.x = src0.x - src1.x + + dst.y = src0.y - src1.y + + dst.z = src0.z - src1.z + + dst.w = src0.w - src1.w + + +LRP - Linear Interpolate + +.. math:: + + dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x + + dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y + + dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z + + dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w + + +CND - Condition + +.. math:: + + dst.x = (src2.x > 0.5) ? src0.x : src1.x + + dst.y = (src2.y > 0.5) ? src0.y : src1.y + + dst.z = (src2.z > 0.5) ? src0.z : src1.z + + dst.w = (src2.w > 0.5) ? src0.w : src1.w + + +DP2A - 2-component Dot Product And Add + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x + + +FRAC - Fraction + +.. math:: + + dst.x = src.x - \lfloor src.x\rfloor + + dst.y = src.y - \lfloor src.y\rfloor + + dst.z = src.z - \lfloor src.z\rfloor + + dst.w = src.w - \lfloor src.w\rfloor + + +CLAMP - Clamp + +.. math:: + + dst.x = clamp(src0.x, src1.x, src2.x) + + dst.y = clamp(src0.y, src1.y, src2.y) + + dst.z = clamp(src0.z, src1.z, src2.z) + + dst.w = clamp(src0.w, src1.w, src2.w) + + +FLR - Floor + +This is identical to ARL. + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +ROUND - Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +EX2 - Exponential Base 2 + +.. math:: + + dst.x = 2^{src.x} + + dst.y = 2^{src.x} + + dst.z = 2^{src.x} + + dst.w = 2^{src.x} + + +LG2 - Logarithm Base 2 + +.. math:: + + dst.x = \log_2{src.x} + + dst.y = \log_2{src.x} + + dst.z = \log_2{src.x} + + dst.w = \log_2{src.x} + + +POW - Power + +.. math:: + + dst.x = src0.x^{src1.x} + + dst.y = src0.x^{src1.x} + + dst.z = src0.x^{src1.x} + + dst.w = src0.x^{src1.x} + +XPD - Cross Product + +.. math:: + + dst.x = src0.y \times src1.z - src1.y \times src0.z + + dst.y = src0.z \times src1.x - src1.z \times src0.x + + dst.z = src0.x \times src1.y - src1.x \times src0.y + + dst.w = 1 + + +ABS - Absolute + +.. math:: + + dst.x = |src.x| + + dst.y = |src.y| + + dst.z = |src.z| + + dst.w = |src.w| + + +RCC - Reciprocal Clamped + +XXX cleanup on aisle three + +.. math:: + + dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + +DPH - Homogeneous Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + +COS - Cosine + +.. math:: + + dst.x = \cos{src.x} + + dst.y = \cos{src.x} + + dst.z = \cos{src.x} + + dst.w = \cos{src.x} + + +DDX - Derivative Relative To X + +.. math:: + + dst.x = partialx(src.x) + + dst.y = partialx(src.y) + + dst.z = partialx(src.z) + + dst.w = partialx(src.w) + + +DDY - Derivative Relative To Y + +.. math:: + + dst.x = partialy(src.x) + + dst.y = partialy(src.y) + + dst.z = partialy(src.z) + + dst.w = partialy(src.w) + + +KILP - Predicated Discard + + discard + + +PK2H - Pack Two 16-bit Floats + + TBD + + +PK2US - Pack Two Unsigned 16-bit Scalars + + TBD + + +PK4B - Pack Four Signed 8-bit Scalars + + TBD + + +PK4UB - Pack Four Unsigned 8-bit Scalars + + TBD + + +RFL - Reflection Vector + +.. math:: + + dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x + + dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y + + dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z + + dst.w = 1 + +Considered for removal. + + +SEQ - Set On Equal + +.. math:: + + dst.x = (src0.x == src1.x) ? 1 : 0 + dst.y = (src0.y == src1.y) ? 1 : 0 + dst.z = (src0.z == src1.z) ? 1 : 0 + dst.w = (src0.w == src1.w) ? 1 : 0 + + +SFL - Set On False + +.. math:: + + dst.x = 0 + dst.y = 0 + dst.z = 0 + dst.w = 0 + +Considered for removal. + +SGT - Set On Greater Than + +.. math:: + + dst.x = (src0.x > src1.x) ? 1 : 0 + dst.y = (src0.y > src1.y) ? 1 : 0 + dst.z = (src0.z > src1.z) ? 1 : 0 + dst.w = (src0.w > src1.w) ? 1 : 0 + + +SIN - Sine + +.. math:: + + dst.x = \sin{src.x} + + dst.y = \sin{src.x} + + dst.z = \sin{src.x} + + dst.w = \sin{src.x} + + +SLE - Set On Less Equal Than + +.. math:: + + dst.x = (src0.x <= src1.x) ? 1 : 0 + dst.y = (src0.y <= src1.y) ? 1 : 0 + dst.z = (src0.z <= src1.z) ? 1 : 0 + dst.w = (src0.w <= src1.w) ? 1 : 0 + + +SNE - Set On Not Equal + +.. math:: + + dst.x = (src0.x != src1.x) ? 1 : 0 + dst.y = (src0.y != src1.y) ? 1 : 0 + dst.z = (src0.z != src1.z) ? 1 : 0 + dst.w = (src0.w != src1.w) ? 1 : 0 + + +STR - Set On True + +.. math:: + + dst.x = 1 + dst.y = 1 + dst.z = 1 + dst.w = 1 + + +TEX - Texture Lookup + + TBD + + +TXD - Texture Lookup with Derivatives + + TBD + + +TXP - Projective Texture Lookup + + TBD + + +UP2H - Unpack Two 16-Bit Floats + + TBD + + Considered for removal. + +UP2US - Unpack Two Unsigned 16-Bit Scalars + + TBD + + Considered for removal. + +UP4B - Unpack Four Signed 8-Bit Values + + TBD + + Considered for removal. + +UP4UB - Unpack Four Unsigned 8-Bit Scalars + + TBD + + Considered for removal. + +X2D - 2D Coordinate Transformation + +.. math:: + + dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w + dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w + +Considered for removal. + + +From GL_NV_vertex_program2 +^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +ARA - Address Register Add + + TBD + + Considered for removal. + +ARR - Address Register Load With Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +BRA - Branch + + pc = target + + Considered for removal. + +CAL - Subroutine Call + + push(pc) + pc = target + + +RET - Subroutine Call Return + + pc = pop() + + Potential restrictions: + * Only occurs at end of function. + +SSG - Set Sign + +.. math:: + + dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 + + dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 + + dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 + + dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 + + +CMP - Compare + +.. math:: + + dst.x = (src0.x < 0) ? src1.x : src2.x + + dst.y = (src0.y < 0) ? src1.y : src2.y + + dst.z = (src0.z < 0) ? src1.z : src2.z + + dst.w = (src0.w < 0) ? src1.w : src2.w + + +KIL - Conditional Discard + +.. math:: + + if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0) + discard + endif + + +SCS - Sine Cosine + +.. math:: + + dst.x = \cos{src.x} + + dst.y = \sin{src.x} + + dst.z = 0 + + dst.y = 1 + + +TXB - Texture Lookup With Bias + + TBD + + +NRM - 3-component Vector Normalise + +.. math:: + + dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.w = 1 + + +DIV - Divide + +.. math:: + + dst.x = \frac{src0.x}{src1.x} + + dst.y = \frac{src0.y}{src1.y} + + dst.z = \frac{src0.z}{src1.z} + + dst.w = \frac{src0.w}{src1.w} + + +DP2 - 2-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + + dst.y = src0.x \times src1.x + src0.y \times src1.y + + dst.z = src0.x \times src1.x + src0.y \times src1.y + + dst.w = src0.x \times src1.x + src0.y \times src1.y + + +TXL - Texture Lookup With LOD + + TBD + + +BRK - Break + + TBD + + +IF - If + + TBD + + +BGNFOR - Begin a For-Loop + + dst.x = floor(src.x) + dst.y = floor(src.y) + dst.z = floor(src.z) + + if (dst.y <= 0) + pc = [matching ENDFOR] + 1 + endif + + Note: The destination must be a loop register. + The source must be a constant register. + + Considered for cleanup / removal. + + +REP - Repeat + + TBD + + +ELSE - Else + + TBD + + +ENDIF - End If + + TBD + + +ENDFOR - End a For-Loop + + dst.x = dst.x + dst.z + dst.y = dst.y - 1.0 + + if (dst.y > 0) + pc = [matching BGNFOR instruction] + 1 + endif + + Note: The destination must be a loop register. + + Considered for cleanup / removal. + +ENDREP - End Repeat + + TBD + + +PUSHA - Push Address Register On Stack + + push(src.x) + push(src.y) + push(src.z) + push(src.w) + + Considered for cleanup / removal. + +POPA - Pop Address Register From Stack + + dst.w = pop() + dst.z = pop() + dst.y = pop() + dst.x = pop() + + Considered for cleanup / removal. + + +From GL_NV_gpu_program4 +^^^^^^^^^^^^^^^^^^^^^^^^ + +Support for these opcodes indicated by a special pipe capability bit (TBD). + +CEIL - Ceiling + +.. math:: + + dst.x = \lceil src.x\rceil + + dst.y = \lceil src.y\rceil + + dst.z = \lceil src.z\rceil + + dst.w = \lceil src.w\rceil + + +I2F - Integer To Float + +.. math:: + + dst.x = (float) src.x + + dst.y = (float) src.y + + dst.z = (float) src.z + + dst.w = (float) src.w + + +NOT - Bitwise Not + +.. math:: + + dst.x = ~src.x + + dst.y = ~src.y + + dst.z = ~src.z + + dst.w = ~src.w + + +TRUNC - Truncate + +.. math:: + + dst.x = trunc(src.x) + + dst.y = trunc(src.y) + + dst.z = trunc(src.z) + + dst.w = trunc(src.w) + + +SHL - Shift Left + +.. math:: + + dst.x = src0.x << src1.x + + dst.y = src0.y << src1.x + + dst.z = src0.z << src1.x + + dst.w = src0.w << src1.x + + +SHR - Shift Right + +.. math:: + + dst.x = src0.x >> src1.x + + dst.y = src0.y >> src1.x + + dst.z = src0.z >> src1.x + + dst.w = src0.w >> src1.x + + +AND - Bitwise And + +.. math:: + + dst.x = src0.x & src1.x + + dst.y = src0.y & src1.y + + dst.z = src0.z & src1.z + + dst.w = src0.w & src1.w + + +OR - Bitwise Or + +.. math:: + + dst.x = src0.x | src1.x + + dst.y = src0.y | src1.y + + dst.z = src0.z | src1.z + + dst.w = src0.w | src1.w + + +MOD - Modulus + +.. math:: + + dst.x = src0.x \bmod src1.x + + dst.y = src0.y \bmod src1.y + + dst.z = src0.z \bmod src1.z + + dst.w = src0.w \bmod src1.w + + +XOR - Bitwise Xor + +.. math:: + + dst.x = src0.x ^ src1.x + + dst.y = src0.y ^ src1.y + + dst.z = src0.z ^ src1.z + + dst.w = src0.w ^ src1.w + + +SAD - Sum Of Absolute Differences + +.. math:: + + dst.x = |src0.x - src1.x| + src2.x + + dst.y = |src0.y - src1.y| + src2.y + + dst.z = |src0.z - src1.z| + src2.z + + dst.w = |src0.w - src1.w| + src2.w + + +TXF - Texel Fetch + + TBD + + +TXQ - Texture Size Query + + TBD + + +CONT - Continue + + TBD + + +From GL_NV_geometry_program4 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +EMIT - Emit + + TBD + + +ENDPRIM - End Primitive + + TBD + + +From GLSL +^^^^^^^^^^ + + +BGNLOOP - Begin a Loop + + TBD + + +BGNSUB - Begin Subroutine + + TBD + + +ENDLOOP - End a Loop + + TBD + + +ENDSUB - End Subroutine + + TBD + + +NOP - No Operation + + Do nothing. + + +NRM4 - 4-component Vector Normalise + +.. math:: + + dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + +ps_2_x +^^^^^^^^^^^^ + + +CALLNZ - Subroutine Call If Not Zero + + TBD + + +IFC - If + + TBD + + +BREAKC - Break Conditional + + TBD + + +Explanation of symbols used +------------------------------ + + +Functions +^^^^^^^^^^^^^^ + + + :math:`|x|` Absolute value of `x`. + + :math:`\lceil x \rceil` Ceiling of `x`. + + clamp(x,y,z) Clamp x between y and z. + (x < y) ? y : (x > z) ? z : x + + :math:`\lfloor x\rfloor` Floor of `x`. + + :math:`\log_2{x}` Logarithm of `x`, base 2. + + max(x,y) Maximum of x and y. + (x > y) ? x : y + + min(x,y) Minimum of x and y. + (x < y) ? x : y + + partialx(x) Derivative of x relative to fragment's X. + + partialy(x) Derivative of x relative to fragment's Y. + + pop() Pop from stack. + + :math:`x^y` `x` to the power `y`. + + push(x) Push x on stack. + + round(x) Round x. + + trunc(x) Truncate x, i.e. drop the fraction bits. + + +Keywords +^^^^^^^^^^^^^ + + + discard Discard fragment. + + dst First destination register. + + dst0 First destination register. + + pc Program counter. + + src First source register. + + src0 First source register. + + src1 Second source register. + + src2 Third source register. + + target Label of target instruction. + + +Other tokens +--------------- + + +Declaration Semantic +^^^^^^^^^^^^^^^^^^^^^^^^ + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + +TGSI_SEMANTIC_POSITION +"""""""""""""""""""""" + +Position, sometimes known as HPOS or WPOS for historical reasons, is the +location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z`` +are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used +for the perspective divide, if enabled. + +As a vertex shader output, position should be scaled to the viewport. When +used in fragment shaders, position will --- + +XXX --- wait a minute. Should position be in [0,1] for x and y? + +XXX additionally, is there a way to configure the perspective divide? it's +accelerated on most chipsets AFAIK... + +Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can +be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``. + +XXX usually? can we solidify that? + +TGSI_SEMANTIC_COLOR +""""""""""""""""""" + +Colors are used to, well, color the primitives. Colors are always in +``(r, g, b, a)`` format. + +If alpha is not specified, it defaults to 1. + +TGSI_SEMANTIC_BCOLOR +"""""""""""""""""""" + +Back-facing colors are only used for back-facing polygons, and are only valid +in vertex shader outputs. After rasterization, all polygons are front-facing +and COLOR and BCOLOR end up occupying the same slots in the fragment, so +all BCOLORs effectively become regular COLORs in the fragment shader. + +TGSI_SEMANTIC_FOG +""""""""""""""""" + +The fog coordinate historically has been used to replace the depth coordinate +for generation of fog in dedicated fog blocks. Gallium, however, does not use +dedicated fog acceleration, placing it entirely in the fragment shader +instead. + +The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first +component matters when writing from the vertex shader; the driver will ensure +that the coordinate is in this format when used as a fragment shader input. + +TGSI_SEMANTIC_PSIZE +""""""""""""""""""" + +PSIZE, or point size, is used to specify point sizes per-vertex. It should +be in ``(p, n, x, f)`` format, where ``p`` is the point size, ``n`` is the minimum +size, ``x`` is the maximum size, and ``f`` is the fade threshold. + +XXX this is arb_vp. is this what we actually do? should double-check... + +When using this semantic, be sure to set the appropriate state in the +:ref:`rasterizer` first. + +TGSI_SEMANTIC_GENERIC +""""""""""""""""""""" + +Generic semantics are nearly always used for texture coordinate attributes, +in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds +of lookups, and ``q`` is the level-of-detail bias for biased sampling. + +These attributes are called "generic" because they may be used for anything +else, including parameters, texture generation information, or anything that +can be stored inside a four-component vector. + +TGSI_SEMANTIC_NORMAL +"""""""""""""""""""" + +Vertex normal; could be used to implement per-pixel lighting for legacy APIs +that allow mixing fixed-function and programmable stages. + +TGSI_SEMANTIC_FACE +"""""""""""""""""" + +FACE is the facing bit, to store the facing information for the fragment +shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive +when the fragment is front-facing, and negative when the component is +back-facing. + +TGSI_SEMANTIC_EDGEFLAG +"""""""""""""""""""""" + +XXX no clue diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba8..aa29dcb3947 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -358,6 +358,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +371,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 1498fb665a2..e402ed29220 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_fence fence; struct cell_buffer_node *head; }; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 2016b21a409..0a4da8ecc85 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp) } } - draw_set_mapped_constant_buffer(sp->draw, + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], sp->constants[PIPE_SHADER_VERTEX]->size); } @@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) * * XXX should the element buffer be specified/bound with a separate function? */ -static boolean +static void cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -145,29 +145,27 @@ cell_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ cell_unmap_constant_buffers(sp); - - return TRUE; } -static boolean +static void cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return cell_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } -static boolean +static void cell_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return cell_draw_elements(pipe, NULL, 0, mode, start, count); + cell_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index efc4f78364b..b723e794e71 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index ab55a24bb64..f1e1dcb9eb0 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -331,7 +331,7 @@ cell_emit_state(struct cell_context *cell) const struct draw_context *const draw = cell->draw; struct cell_shader_info info; - info.num_outputs = draw_num_vs_outputs(draw); + info.num_outputs = draw_num_shader_outputs(draw); info.declarations = (uintptr_t) draw->vs.machine.Declarations; info.num_declarations = draw->vs.machine.NumDeclarations; info.instructions = (uintptr_t) draw->vs.machine.Instructions; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 5c0179d9546..55bd85bde2b 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -405,8 +404,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -418,8 +415,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -547,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 5ed330aa6ec..d2166a49016 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1681,7 +1681,7 @@ exec_instruction( } break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 86056799405..0ca92af248d 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d258..98919c43ffc 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51d..b18f4c22ef1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -107,10 +108,11 @@ struct spu_framebuffer uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; @@ -123,20 +125,22 @@ struct spu_texture_level vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -165,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +191,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073abf..14987e3c3a2 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a57..087963960df 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d397..3e9804bf8ee 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 37184eac7b1..46e4338d98a 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe ) } +void failover_fail_over( struct failover_context *failover ) +{ + failover->dirty = TRUE; + failover->mode = FO_SW; +} + -static boolean failover_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, + unsigned start, + unsigned count) { struct failover_context *failover = failover_context( pipe ); @@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - if (!failover->hw->draw_elements( failover->hw, - indexBuffer, - indexSize, - prim, - start, - count )) { - - failover->hw->flush( failover->hw, ~0, NULL ); - failover->mode = FO_SW; - } + failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count ); } /* Possibly try software: */ if (failover->mode == FO_SW) { - if (failover->dirty) + if (failover->dirty) { + failover->hw->flush( failover->hw, ~0, NULL ); failover_state_emit( failover ); + } failover->sw->draw_elements( failover->sw, indexBuffer, @@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe, */ failover->sw->flush( failover->sw, ~0, NULL ); } - - return TRUE; } -static boolean failover_draw_arrays( struct pipe_context *pipe, +static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return failover_draw_elements(pipe, NULL, 0, prim, start, count); + failover_draw_elements(pipe, NULL, 0, prim, start, count); } static unsigned int diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h index a8ce997a1f6..533122b69da 100644 --- a/src/gallium/drivers/failover/fo_winsys.h +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -36,10 +36,13 @@ struct pipe_context; +struct failover_context; struct pipe_context *failover_create( struct pipe_context *hw, struct pipe_context *sw ); +void failover_fail_over( struct failover_context *failover ); + #endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c index effeba12972..669964770d4 100644 --- a/src/gallium/drivers/i915/i915_buffer.c +++ b/src/gallium/drivers/i915/i915_buffer.c @@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen, { struct i915_buffer *buf = i915_buffer(buffer); assert(!buf->ibuf); + (void) buf; } static void diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 94c8aee30fe..89feeade756 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -45,7 +45,7 @@ */ -static boolean +static void i915_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe, } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, i915->current.constants[PIPE_SHADER_VERTEX], (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); @@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } - - return TRUE; } -static boolean +static void i915_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) { - return i915_draw_range_elements(pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - prim, start, count); + i915_draw_range_elements(pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count); } -static boolean +static void i915_draw_arrays(struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return i915_draw_elements(pipe, NULL, 0, prim, start, count); + i915_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index f7ebfadd593..0fab6e1bc36 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -58,10 +58,10 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; -/* + /* case PIPE_TEX_WRAP_MIRRORED_REPEAT: return TEXCOORDMODE_MIRROR; -*/ + */ default: return TEXCOORDMODE_WRAP; } @@ -74,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter ) return FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return FILTER_ANISOTROPIC; default: assert(0); return FILTER_NEAREST; @@ -221,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe, minFilt = translate_img_filter( sampler->min_img_filter ); magFilt = translate_img_filter( sampler->mag_img_filter ); + if (sampler->max_anisotropy > 1.0) + minFilt = magFilt = FILTER_ANISOTROPIC; + if (sampler->max_anisotropy > 2.0) { cso->state[0] |= SS2_MAX_ANISO_4; } diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 178d4e8781d..03dd5091a61 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* pos */ - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; @@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* primary color */ if (colors[0]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_COLOR; } /* secondary color */ if (colors[1]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; } /* fog coord, not fog blend factor */ if (fog) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; } @@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 58d9e56df27..d67a1a62633 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -83,19 +83,19 @@ compile_clip_prog( struct brw_context *brw, c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; - if (c.key.output_color0) + if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT) c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; - if (c.key.output_color1) + if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT) c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; - if (c.key.output_bfc0) + if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; - if (c.key.output_bfc1) + if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; - if (c.key.output_edgeflag) + if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) @@ -182,7 +182,6 @@ upload_clip_prog(struct brw_context *brw) */ /* CACHE_NEW_VS_PROG */ key.nr_attrs = brw->vs.prog_data->nr_outputs; - key.output_edgeflag = brw->vs.prog_data->output_edgeflag; /* PIPE_NEW_VS */ key.output_hpos = vs->output_hpos; @@ -190,6 +189,7 @@ upload_clip_prog(struct brw_context *brw) key.output_color1 = vs->output_color1; key.output_bfc0 = vs->output_bfc0; key.output_bfc1 = vs->output_bfc1; + key.output_edgeflag = vs->output_edgeflag; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 56e78074000..8c006bb95b2 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -120,6 +120,13 @@ #define BRW_MAX_CURBE (32*16) + +/* Need a value to say a particular vertex shader output isn't + * present. Limits us to 63 outputs currently. + */ +#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1) + + struct brw_context; struct brw_depth_stencil_state { @@ -335,8 +342,6 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ - GLuint output_edgeflag; - GLboolean writes_psiz; /* Used for calculating urb partitions: diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h index 77d402d35e6..ba5b109c483 100644 --- a/src/gallium/drivers/i965/brw_disasm.h +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -23,6 +23,8 @@ #ifndef BRW_DISASM_H #define BRW_DISASM_H +#include <stdio.h> + struct brw_instruction; int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 852fd229828..ea8d39adaf0 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -176,7 +176,7 @@ try_draw_range_elements(struct brw_context *brw, } -static boolean +static void brw_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -228,29 +228,27 @@ brw_draw_range_elements(struct pipe_context *pipe, ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); assert(ret == 0); } - - return TRUE; } -static boolean +static void brw_draw_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned mode, unsigned start, unsigned count) { - return brw_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - mode, - start, count ); + brw_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + mode, + start, count ); } -static boolean +static void brw_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return brw_draw_elements(pipe, NULL, 0, mode, start, count); + brw_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 4fe7b6acc16..00d8eaccbc4 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -860,7 +860,7 @@ void brw_land_fwd_jump(struct brw_compile *p, jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index 211be881789..452e1e89f93 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -114,18 +114,18 @@ static void color_clear(struct brw_context *brw, const float *rgba ) { enum pipe_error ret; - unsigned value; + union util_color value; util_pack_color( rgba, bsurface->base.format, &value ); if (bsurface->cpp == 2) - value |= value << 16; + value.ui |= value.ui << 16; - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); if (ret != 0) { brw_context_flush( brw ); - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); assert( ret == 0 ); } } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 6b03094f502..5d4e5025f97 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "brw_context.h" +#include "brw_debug.h" /** * called from intelDrawBuffer() @@ -51,8 +52,14 @@ static void brw_set_viewport_state( struct pipe_context *pipe, struct brw_context *brw = brw_context(pipe); brw->curr.viewport = *viewport; - brw->curr.ccv.min_depth = 0.0; /* XXX: near */ - brw->curr.ccv.max_depth = 1.0; /* XXX: far */ + brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2]; + brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2]; + + if (0) + debug_printf("%s depth range %f .. %f\n", + __FUNCTION__, + brw->curr.ccv.min_depth, + brw->curr.ccv.max_depth); brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 5ddc63f57ec..81712798a5d 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -48,8 +48,6 @@ static GLuint translate_img_filter( unsigned filter ) return BRW_MAPFILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return BRW_MAPFILTER_ANISOTROPIC; default: assert(0); return BRW_MAPFILTER_NEAREST; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 6c376e5e5d3..e389587f3e1 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -197,6 +197,13 @@ static void *brw_create_vs_state( struct pipe_context *pipe, vs->id = brw->program_id++; vs->has_flow_control = has_flow_control(&vs->info); + vs->output_hpos = BRW_OUTPUT_NOT_PRESENT; + vs->output_color0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_color1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT; + for (i = 0; i < vs->info.num_outputs; i++) { int index = vs->info.output_semantic_index[i]; switch (vs->info.output_semantic_name[i]) { diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 714def5046d..8a16205d2f6 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -79,18 +79,12 @@ static void release_tmps( struct brw_vs_compile *c ) static boolean is_position_output( struct brw_vs_compile *c, unsigned vs_output ) { - struct brw_vertex_shader *vs = c->vp; - - if (vs_output == c->prog_data.output_edgeflag) { - return FALSE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; - return (semantic == TGSI_SEMANTIC_POSITION && - index == 0); - } + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); } @@ -98,23 +92,16 @@ static boolean find_output_slot( struct brw_vs_compile *c, unsigned vs_output, unsigned *fs_input_slot ) { - struct brw_vertex_shader *vs = c->vp; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; - if (vs_output == c->prog_data.output_edgeflag) { - *fs_input_slot = c->key.fs_signature.nr_inputs; - return TRUE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - unsigned i; - - for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { - if (c->key.fs_signature.input[i].semantic == semantic && + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && c->key.fs_signature.input[i].semantic_index == index) { - *fs_input_slot = i; - return TRUE; - } + *fs_input_slot = i; + return TRUE; } } diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 7e57d0306bc..8f983a60ae8 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -691,7 +691,7 @@ static void emit_xpd( struct brw_compile *p, { GLuint i; - assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X); + assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index b975182e7bd..f9063d90fb1 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -45,7 +45,7 @@ identity_destroy(struct pipe_context *_pipe) free(id_pipe); } -static boolean +static void identity_draw_arrays(struct pipe_context *_pipe, unsigned prim, unsigned start, @@ -54,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe, struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - return pipe->draw_arrays(pipe, - prim, - start, - count); + pipe->draw_arrays(pipe, + prim, + start, + count); } -static boolean +static void identity_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -73,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_elements(pipe, - indexBuffer, - indexSize, - prim, - start, - count); + pipe->draw_elements(pipe, + indexBuffer, + indexSize, + prim, + start, + count); } -static boolean +static void identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -96,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, - minIndex, - maxIndex, - mode, - start, - count); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, + minIndex, + maxIndex, + mode, + start, + count); } static struct pipe_query * diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index e038a5229e5..7c6e46006b9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -50,7 +50,6 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 0c3f00fd58f..72d9f39658f 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -59,27 +59,16 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.5 or greater. LLVM 2.6 is preferred. + - LLVM 2.6. - On Debian based distributions do: + For Linux, on a recent Debian based distribution do: aptitude install llvm-dev - There is a typo in one of the llvm 2.5 headers, that may cause compilation - errors. To fix it apply the change: - - --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 - +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 - @@ -831,7 +831,7 @@ - template<typename T> - inline T **unwrap(LLVMValueRef *Vals, unsigned Length) { - #if DEBUG - - for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I) - + for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I) - cast<T>(*I); - #endif - return reinterpret_cast<T**>(Vals); - + For Windows download pre-built MSVC 9.0 or MinGW binaries from + http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment + variable to the extracted path. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -95,9 +84,9 @@ Requirements Building ======== -To build everything invoke scons as: +To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false -k + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -105,12 +94,15 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as but the rest of these instructions assume that scons is used. +For windows is everything the except except the winsys: + + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false Using ===== -Building will create a drop-in alternative for libGL.so. To use it set the -environment variables: +On Linux, building will create a drop-in alternative for libGL.so. To use it +set the environment variables: export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH @@ -121,6 +113,11 @@ or For performance evaluation pass debug=no to scons, and use the corresponding lib directory without the "-debug" suffix. +On Windows, building will create a drop-in alternative for opengl32.dll. To use +it put it in the same directory as the application. It can also be used by +replacing the native ICD driver, but it's quite an advanced usage, so if you +need to ask, don't even try it. + Unit testing ============ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3bd2e700138..6bb545a501f 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.Append(CPPPATH = ['.']) + env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', @@ -64,7 +66,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', @@ -74,21 +75,19 @@ llvmpipe = env.ConvenienceLibrary( env = env.Clone() -env.Prepend(LIBS = [llvmpipe] + auxiliaries) +env.Prepend(LIBS = [llvmpipe] + gallium) -env.Program( - target = 'lp_test_format', - source = ['lp_test_format.c', 'lp_test_main.c'], -) +tests = [ + 'format', + 'blend', + 'conv', +] -env.Program( - target = 'lp_test_blend', - source = ['lp_test_blend.c', 'lp_test_main.c'], -) - -env.Program( - target = 'lp_test_conv', - source = ['lp_test_conv.c', 'lp_test_main.c'], -) +for test in tests: + target = env.Program( + target = 'lp_test_' + test, + source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + ) + env.InstallProgram(target) Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index d14f468ba93..ced7b9c11d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, enum lp_build_blend_swizzle { LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index dcc25fbff86..25c10af29f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -47,7 +47,7 @@ */ enum lp_build_flow_construct_kind { lP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_SKIP }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index 5836e0173f9..10e82f120bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -130,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); - // UIToFP can't be expressed in SSE2 + /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); if (normalized) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp index d3f78c06d92..6e79438ead0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -59,3 +59,17 @@ LLVMInitializeNativeTarget(void) #endif + + +/* + * Hack to allow the linking of release LLVM static libraries on a debug build. + * + * See also: + * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d + */ +#if defined(_MSC_VER) && defined(_DEBUG) +#include <crtdefs.h> +extern "C" { + _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} +} +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c index af70ddc6ab9..9003e108c1c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -69,8 +69,8 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->min_img_filter = sampler->min_img_filter; state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; + state->compare_mode = sampler->compare_mode; + if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { state->compare_func = sampler->compare_func; } state->normalized_coords = sampler->normalized_coords; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 47b68b71e25..5ee8d556a68 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -488,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef res; unsigned chan; - if(!bld->static_state->compare_mode) + if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) return; /* TODO: Compare before swizzling, to avoid redundant computations */ @@ -577,7 +577,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: if(lp_format_is_rgba8(bld.format_desc)) lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); else diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index a67c70ff25a..fb1eda4423b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -321,7 +321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, { const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; - LLVMValueRef oow; + LLVMValueRef oow = NULL; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; @@ -361,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, if (projected) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } + for (i = num_coords; i < 3; i++) { + coords[i] = bld->base.undef; + } bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, @@ -446,7 +449,12 @@ emit_instruction( { unsigned chan_index; LLVMValueRef src0, src1, src2; - LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + LLVMValueRef tmp0, tmp1, tmp2; + LLVMValueRef tmp3 = NULL; + LLVMValueRef tmp4 = NULL; + LLVMValueRef tmp5 = NULL; + LLVMValueRef tmp6 = NULL; + LLVMValueRef tmp7 = NULL; LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; @@ -1310,7 +1318,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index f381156d00f..aaa675aec77 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -140,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); unsigned i; + /* check if any of the bound drawing surfaces are this texture */ if(llvmpipe->dirty_render_cache) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { if(llvmpipe->framebuffer.cbufs[i] && @@ -150,6 +151,13 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, llvmpipe->framebuffer.zsbuf->texture == texture) return PIPE_REFERENCED_FOR_WRITE; } + + /* check if any of the tex_cache textures are this texture */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (llvmpipe->tex_cache[i] && + llvmpipe->tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { if (llvmpipe->vertex_tex_cache[i] && llvmpipe->vertex_tex_cache[i]->texture == texture) @@ -248,22 +256,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen); - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i]; - llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i]; - llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; - } - /* * Create drawing context and plug our rendering stage into it. */ @@ -271,10 +263,7 @@ llvmpipe_create( struct pipe_screen *screen ) if (!llvmpipe->draw) goto fail; - draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_VERTEX_SAMPLERS, - (struct tgsi_sampler **) - llvmpipe->tgsi.vert_samplers_list); + /* FIXME: devise alternative to draw_texture_samplers */ if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index ca50585896e..426d6eb4a12 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -115,14 +115,6 @@ struct llvmpipe_context { unsigned line_stipple_counter; - /** TGSI exec things */ - struct { - struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; - } tgsi; - /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 2299566c665..c152b4413fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,11 +45,11 @@ -boolean +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); + llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_arrays(draw, mode, start, count); /* - * unmap vertex/index buffers - will cause draw module to flush + * unmap vertex/index buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); @@ -112,24 +112,28 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); /* Note: leave drawing surfaces mapped */ lp->dirty_render_cache = TRUE; - - return TRUE; } -boolean +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + llvmpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index bce3baec164..4ef0783f3e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -79,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[5]; + LLVMTypeRef elem_types[4]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ + elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, screen->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers, - screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, - screen->target, context_type, 2); + screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, - screen->target, context_type, 3); + screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); @@ -109,24 +106,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } - /* fetch_texel - */ - { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[3]; - LLVMValueRef fetch_texel; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - arg_types[1] = LLVMInt32Type(); /* unit */ - arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */ - - fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel", - ret_type, arg_types, Elements(arg_types)); - - LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa); - } - #ifdef DEBUG LLVMDumpModule(screen->module); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 58f716ede29..277b690c02c 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -41,7 +41,6 @@ #include "pipe/p_state.h" -struct tgsi_sampler; struct llvmpipe_screen; @@ -78,8 +77,6 @@ struct lp_jit_context { const float *constants; - struct tgsi_sampler **samplers; - float alpha_ref_value; /* FIXME: store (also?) in floats */ @@ -92,16 +89,13 @@ struct lp_jit_context #define lp_jit_context_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 0, "constants") -#define lp_jit_context_samplers(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "samplers") - #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value") + lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 3, "blend_color") + lp_build_struct_get(_builder, _ptr, 2, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") @@ -118,12 +112,6 @@ typedef void void *color, void *depth); -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ); - - void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 4abff4ecccc..e8e2e2524ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -128,6 +128,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index 7eb05de77a1..c3a48700a4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -31,6 +31,7 @@ #ifndef LP_QUAD_H #define LP_QUAD_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "tgsi/tgsi_exec.h" @@ -83,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; }; @@ -92,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd3..0b2d3a28014 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -117,7 +117,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ -ALIGN_STACK +PIPE_ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 8e0c773a634..e16793186be 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -56,7 +56,6 @@ #define LP_NEW_QUERY 0x4000 -struct tgsi_sampler; struct vertex_info; struct pipe_context; struct llvmpipe_context; @@ -197,14 +196,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp); void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); -boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -boolean llvmpipe_draw_elements(struct pipe_context *pipe, +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index b2e75d3b14e..a94cd05ef20 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_debug_dump.h" +#include "draw/draw_context.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" @@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->blend == blend) + return; + + draw_flush(llvmpipe->draw); + llvmpipe->blend = blend; llvmpipe->dirty |= LP_NEW_BLEND; @@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned i, j; + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); if(!llvmpipe->jit_context.blend_color) @@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; if(llvmpipe->depth_stencil) llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index e703964aaa8..6c1ef6bc42d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); + const uint num = draw_current_shader_outputs(llvmpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, + src = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (llvmpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, @@ -192,36 +192,6 @@ compute_cliprect(struct llvmpipe_context *lp) } -static void -update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) -{ - unsigned i; - - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i]; - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] ); - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] ); - } - - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; -} - /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ @@ -237,8 +207,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } if (llvmpipe->dirty & (LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - update_tgsi_samplers( llvmpipe ); + LP_NEW_TEXTURE)) { + /* TODO */ + } if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d129918fe2d..9f4bbef73fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -453,8 +453,8 @@ generate_fragment(struct llvmpipe_context *lp, debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); debug_printf(" .mag_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode) - debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } @@ -550,13 +550,8 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); -#if 0 - /* C texture sampling */ - sampler = lp_c_sampler_soa_create(context_ptr); -#else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); -#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -673,7 +668,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->fs = (struct lp_fragment_shader *) fs; + if (llvmpipe->fs == fs) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; } @@ -688,6 +688,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) struct lp_fragment_shader_variant *variant; assert(fs != llvmpipe->fs); + (void) llvmpipe; variant = shader->variants; while(variant) { @@ -722,8 +723,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); - if(shader == PIPE_SHADER_VERTEX) - draw_flush(llvmpipe->draw); + draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader], constants); @@ -733,7 +733,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, } if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 4561c6b8456..aa3b5a3f91e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, } void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(llvmpipe->draw, setup); + draw_set_rasterizer_state(llvmpipe->draw, rasterizer); - llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + llvmpipe->rasterizer = rasterizer; llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index ba970cac985..e37ff04f3df 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -51,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, struct llvmpipe_context *lp = llvmpipe_context(pipe); uint i; + draw_flush(lp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 8a761648e7e..884e3878e62 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -70,14 +70,18 @@ fail: void -llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs; - llvmpipe->vs = (const struct lp_vertex_shader *)vs; + if (llvmpipe->vs == vs) + return; - draw_bind_vertex_shader(llvmpipe->draw, - (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL)); + draw_bind_vertex_shader(llvmpipe->draw, + vs ? vs->draw_data : NULL); + + llvmpipe->vs = vs; llvmpipe->dirty |= LP_NEW_VS; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981a..6c29e8d8ace 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 968c7a2d4aa..c1abee424c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; @@ -330,7 +330,7 @@ test_one(unsigned verbose, fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; - //abort(); + /* abort(); */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 23ea9ebbe7d..2b258f1052e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h index 9fa6c368125..05fded78e16 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h @@ -115,7 +115,7 @@ extern const struct llvmpipe_cached_tex_tile * lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, union tex_tile_address addr ); -static INLINE const union tex_tile_address +static INLINE union tex_tile_address tex_tile_address( unsigned x, unsigned y, unsigned z, diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 9ad1bde9565..cb59a94464a 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -31,64 +31,11 @@ #include <llvm-c/Core.h> -#include "tgsi/tgsi_exec.h" - -struct llvmpipe_tex_tile_cache; struct lp_sampler_static_state; /** - * Subclass of tgsi_sampler - */ -struct lp_shader_sampler -{ - struct tgsi_sampler base; /**< base class */ - - unsigned processor; - - /* For lp_get_samples_2d_linear_POT: - */ - unsigned xpot; - unsigned ypot; - unsigned level; - - const struct pipe_texture *texture; - const struct pipe_sampler_state *sampler; - - struct llvmpipe_tex_tile_cache *cache; -}; - - - -static INLINE struct lp_shader_sampler * -lp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (struct lp_shader_sampler *) sampler; -} - - - -extern void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - - -/** - * Texture sampling code generator that just calls lp_get_samples C function - * for the actual sampling computation. - * - * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. - */ -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr); - - -/** * Pure-LLVM texture sampling code generator. * * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c deleted file mode 100644 index 0d01c07fb5e..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ /dev/null @@ -1,1713 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture sampling - * - * Authors: - * Brian Paul - */ - -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_sample.h" -#include "lp_tex_cache.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -/* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. - * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). - */ -#define FRAC(f) ((f) - util_ifloor(f)) - - -/** - * Linear interpolation macro - */ -static INLINE float -lerp(float a, float v0, float v1) -{ - return v0 + a * (v1 - v0); -} - - -/** - * Do 2D/biliner interpolation of float values. - * v00, v10, v01 and v11 are typically four texture samples in a square/box. - * a and b are the horizontal and vertical interpolants. - * It's important that this function is inlined when compiled with - * optimization! If we find that's not true on some systems, convert - * to a macro. - */ -static INLINE float -lerp_2d(float a, float b, - float v00, float v10, float v01, float v11) -{ - const float temp0 = lerp(a, v00, v10); - const float temp1 = lerp(a, v01, v11); - return lerp(b, temp0, temp1); -} - - -/** - * As above, but 3D interpolation of 8 values. - */ -static INLINE float -lerp_3d(float a, float b, float c, - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111) -{ - const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); - const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); - return lerp(c, temp0, temp1); -} - - - -/** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. - */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) - - -/** - * Apply texture coord wrapping mode and return integer texture indexes - * for a vector of four texcoords (S or T or P). - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the incoming texcoords - * \param size the texture image size - * \param icoord returns the integer texcoords - * \return integer texture index - */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); - } -} - - -/** - * Used to compute texel locations for linear sampling for four texcoords. - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoords - * \param size the texture image size - * \param icoord0 returns first texture indexes - * \param icoord1 returns second texture indexes (usually icoord0 + 1) - * \param w returns blend factor/weight between texture indexes - * \param icoord returns the computed integer texture coords - */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. - */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. - */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); - } -} - - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - - return face; -} - - -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; - - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; - - assert(sampler->normalized_coords); - - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width0; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height0; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth0; - rho = MAX2(rho, max); - } - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; -} - - -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } -} - - -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into lp_tile_cache.c and merge with the - * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ -static void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, - const uint8_t *out[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - out[0] = &tile->color[y ][x ][0]; - out[1] = &tile->color[y ][x+1][0]; - out[2] = &tile->color[y+1][x ][0]; - out[3] = &tile->color[y+1][x+1][0]; -} - -static INLINE const uint8_t * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - return &tile->color[y][x][0]; -} - - -static void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, - int x0, int y0, - int x1, int y1, - const uint8_t *out[4]) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } -} - -static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (x < 0 || x >= (int) u_minify(texture->width0, level) || - y < 0 || y >= (int) u_minify(texture->height0, level) || - z < 0 || z >= (int) u_minify(texture->depth0, level)) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; - } - else { - const unsigned tx = x % TEX_TILE_SIZE; - const unsigned ty = y % TEX_TILE_SIZE; - const struct llvmpipe_cached_tex_tile *tile; - - tile = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); - rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); - rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); - rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } - } -} - - -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} - - -/** - * As above, but do four z/texture comparisons. - */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ - unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *tx[4]; - - - /* Can we fetch all four at once: - */ - if (x0 < xmax && y0 < ymax) - { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); - } - - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw, yw, - ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), - ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); - } - } -} - - -static void -lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int x0, y0; - const uint8_t *out; - - x0 = util_ifloor(u); - if (x0 < 0) - x0 = 0; - else if (x0 > xpot - 1) - x0 = xpot - 1; - - y0 = util_ifloor(v); - if (y0 < 0) - y0 = 0; - else if (y0 > ypot - 1) - y0 = ypot - 1; - - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; - - if (lambda < 0.0) { - samp->level = 0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - - samp->level = level0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); - - samp->level = level0+1; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); - - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } - } -} - -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ -static void -lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static INLINE void -lp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - depth = u_minify(texture->depth0, level0); - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static void -lp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); - } - lp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); -} - - -static void -lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - } - } - break; - default: - assert(0); - } -} - - -/** - * Error condition handler - */ -static INLINE void -lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - int i,j; - - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; -} - -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. - */ -void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = lp_get_samples_null; - - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = lp_get_samples_rect; - goto out; - } - - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = lp_get_samples_1d; - break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = lp_get_samples_2d; - break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = lp_get_samples_3d; - break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = lp_get_samples_cube; - break; - default: - assert(0); - break; - } - - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); - samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); - - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. - */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; - break; - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; - break; - default: - break; - } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; - break; - default: - break; - } - } - } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } - } - } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); - } - -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); -} - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_tgsi.h" - - -struct lp_c_sampler_soa -{ - struct lp_build_sampler_soa base; - - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -static void -lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) -{ - FREE(sampler); -} - - -static void -lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, - LLVMBuilderRef builder, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!sampler->samplers_ptr) - sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); - - if(!sampler->store_ptr) - sampler->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = sampler->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = sampler->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr) -{ - struct lp_c_sampler_soa *sampler; - - sampler = CALLOC_STRUCT(lp_c_sampler_soa); - if(!sampler) - return NULL; - - sampler->base.destroy = lp_c_sampler_soa_destroy; - sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; - sampler->context_ptr = context_ptr; - - return &sampler->base; -} - diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 040b01865dd..19d00b58d37 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -29,7 +29,7 @@ #define LP_TILE_SOA_H #include "pipe/p_compiler.h" -#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS +#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ #ifdef __cplusplus diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h index 595481c2cbc..74b472b6531 100644 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ b/src/gallium/drivers/llvmpipe/lp_winsys.h @@ -35,7 +35,7 @@ #define LP_WINSYS_H -#include "pipe/p_compiler.h" // for boolean +#include "pipe/p_compiler.h" /* for boolean */ #include "pipe/p_format.h" diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h deleted file mode 100644 index 9c235080a55..00000000000 --- a/src/gallium/drivers/nouveau/nouveau_push.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef __NOUVEAU_PUSH_H__ -#define __NOUVEAU_PUSH_H__ - -#include "nouveau/nouveau_winsys.h" - -#ifndef NOUVEAU_PUSH_CONTEXT -#error undefined push context -#endif - -#define OUT_RING(data) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - (*pc->base.channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4); \ - pc->base.channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -static inline void -DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence) -{ - nouveau_pushbuf_flush(chan, 0); - if (fence) - *fence = NULL; -} - -#define FIRE_RING(fence) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - DO_FIRE_RING(pc->base.channel, fence); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \ - (data), 0, (flags), (vor), (tor)); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - pc->base.channel->vram->handle, \ - pc->base.channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ -#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ - (flags), 0, 0); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index e4cf91c005c..7ebc94ed6c7 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo, unsigned alignment, unsigned usage, unsigned size) { struct pipe_buffer *pb; - + pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *)); if (!pb) { nouveau_bo_ref(NULL, &bo); @@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage)); if (ret) { debug_printf("map failed: %d\n", ret); @@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned offset, unsigned length, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); uint32_t flags = nouveau_screen_map_flags(usage); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map_range(bo, offset, length, flags); if (ret) { + nouveau_bo_unmap(bo); if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY) debug_printf("map_range failed: %d\n", ret); return NULL; @@ -239,5 +260,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) void nouveau_screen_fini(struct nouveau_screen *screen) { + nouveau_channel_free(&screen->channel); } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index ebfc67ad1c1..a7927d88dfc 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -5,6 +5,9 @@ struct nouveau_screen { struct pipe_screen base; struct nouveau_device *device; struct nouveau_channel *channel; + + int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen, + struct pipe_buffer *pb, unsigned usage); }; static inline struct nouveau_screen * diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 9aee9e49566..e844f6abb3d 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -3,41 +3,95 @@ #include "util/u_debug.h" +#ifdef DEBUG +#define DEBUG_NOUVEAU_STATEOBJ +#endif /* DEBUG */ + struct nouveau_stateobj_reloc { struct nouveau_bo *bo; - unsigned offset; - unsigned packet; + struct nouveau_grobj *gr; + uint32_t push_offset; + uint32_t mthd; - unsigned data; + uint32_t data; unsigned flags; unsigned vor; unsigned tor; }; +struct nouveau_stateobj_start { + struct nouveau_grobj *gr; + uint32_t mthd; + uint32_t size; + unsigned offset; +}; + struct nouveau_stateobj { struct pipe_reference reference; - unsigned *push; + struct nouveau_stateobj_start *start; struct nouveau_stateobj_reloc *reloc; - unsigned *cur; - unsigned cur_packet; + /* Common memory pool for data. */ + uint32_t *pool; + unsigned pool_cur; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + unsigned start_alloc; + unsigned reloc_alloc; + unsigned pool_alloc; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + unsigned total; /* includes begin_ring */ + unsigned cur; /* excludes begin_ring, offset from "cur_start" */ + unsigned cur_start; unsigned cur_reloc; }; +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr, total = 0; + + for (i = 0; i < so->cur_start; i++) { + if (so->start[i].gr->subc > -1) + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | (so->start[i].gr->subc << 13) + | so->start[i].mthd); + else + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | so->start[i].mthd); + for (nr = 0; nr < so->start[i].size; nr++, total++) + debug_printf("+0x%04x: 0x%08x\n", total, + so->pool[so->start[i].offset + nr]); + } +} + static INLINE struct nouveau_stateobj * -so_new(unsigned push, unsigned reloc) +so_new(unsigned start, unsigned push, unsigned reloc) { struct nouveau_stateobj *so; so = MALLOC(sizeof(struct nouveau_stateobj)); pipe_reference_init(&so->reference, 1); - so->push = MALLOC(sizeof(unsigned) * push); - so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + so->total = so->cur = so->cur_start = so->cur_reloc = 0; - so->cur = so->push; - so->cur_reloc = so->cur_packet = 0; +#ifdef DEBUG_NOUVEAU_STATEOBJ + so->start_alloc = start; + so->reloc_alloc = reloc; + so->pool_alloc = push; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start)); + so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc)); + so->pool = MALLOC(push * sizeof(uint32_t)); + so->pool_cur = 0; + + if (!so->start || !so->reloc || !so->pool) { + debug_printf("malloc failed\n"); + assert(0); + } return so; } @@ -48,63 +102,128 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) struct nouveau_stateobj *so = *pso; int i; - if (pipe_reference(&(*pso)->reference, &ref->reference)) { - free(so->push); + if (pipe_reference(&(*pso)->reference, &ref->reference)) { + FREE(so->start); for (i = 0; i < so->cur_reloc; i++) nouveau_bo_ref(NULL, &so->reloc[i].bo); - free(so->reloc); - free(so); + FREE(so->reloc); + FREE(so->pool); + FREE(so); } *pso = ref; } static INLINE void -so_data(struct nouveau_stateobj *so, unsigned data) +so_data(struct nouveau_stateobj *so, uint32_t data) { - (*so->cur++) = (data); - so->cur_packet += 4; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur >= so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data; } static INLINE void -so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size) { - so->cur_packet += (4 * size); +#ifdef DEBUG_NOUVEAU_STATEOBJ + if ((so->cur + size) > so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + while (size--) - (*so->cur++) = (*data++); + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = + *data++; } static INLINE void so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, unsigned mthd, unsigned size) { - so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); - so_data(so, (gr->subc << 13) | (size << 18) | mthd); + struct nouveau_stateobj_start *start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start_alloc <= so->cur_start) { + debug_printf("exceeding num_start size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + start = so->start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { + debug_printf("previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = start; + start[so->cur_start].gr = gr; + start[so->cur_start].mthd = mthd; + start[so->cur_start].size = size; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->pool_alloc < (size + so->pool_cur)) { + debug_printf("exceeding num_pool size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + start[so->cur_start].offset = so->pool_cur; + so->pool_cur += size; + + so->cur_start++; + /* The 1 is for *this* begin_ring. */ + so->total += so->cur + 1; + so->cur = 0; } static INLINE void so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, unsigned data, unsigned flags, unsigned vor, unsigned tor) { - struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; - - r->bo = NULL; - nouveau_bo_ref(bo, &r->bo); - r->offset = so->cur - so->push; - r->packet = so->cur_packet; - r->data = data; - r->flags = flags; - r->vor = vor; - r->tor = tor; + struct nouveau_stateobj_reloc *r; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->reloc_alloc <= so->cur_reloc) { + debug_printf("exceeding num_reloc size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + r = so->reloc; + + so->reloc = r; + r[so->cur_reloc].bo = NULL; + nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); + r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; + r[so->cur_reloc].push_offset = so->total + so->cur; + r[so->cur_reloc].data = data; + r[so->cur_reloc].flags = flags; + r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd + + (so->cur << 2); + r[so->cur_reloc].vor = vor; + r[so->cur_reloc].tor = tor; + so_data(so, data); + so->cur_reloc++; } -static INLINE void -so_dump(struct nouveau_stateobj *so) +/* Determine if this buffer object is referenced by this state object. */ +static INLINE boolean +so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo) { - unsigned i, nr = so->cur - so->push; + int i; + + for (i = 0; i < so->cur_reloc; i++) + if (so->reloc[i].bo == bo) + return true; - for (i = 0; i < nr; i++) - debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); + return false; } static INLINE void @@ -114,75 +233,93 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so) unsigned nr, i; int ret = 0; - nr = so->cur - so->push; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start[so->cur_start - 1].size > so->cur) { + debug_printf("emit: previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* We cannot update total in case we so_emit again. */ + nr = so->total + so->cur; + /* This will flush if we need space. * We don't actually need the marker. */ if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) { debug_printf("so_emit failed marker emit with error %d\n", ret); - return; + assert(0); + } + + /* Submit data. This will ensure proper binding of objects. */ + for (i = 0; i < so->cur_start; i++) { + BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size); + OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size); } - pb->remaining -= nr; - memcpy(pb->cur, so->push, nr * 4); for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset, - r->bo, r->data, 0, r->flags, - r->vor, r->tor))) { + if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr + + r->push_offset, r->bo, r->data, + 0, r->flags, r->vor, r->tor))) { debug_printf("so_emit failed reloc with error %d\n", ret); - goto out; + assert(0); } } -out: - pb->cur += nr; } static INLINE void so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so) { struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *gr = NULL; unsigned i; int ret = 0; if (!so) return; - i = so->cur_reloc << 1; - /* This will flush if we need space. - * We don't actually need the marker. - */ - if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) { - debug_printf("so_emit_reloc_markers failed marker emit with" \ - "error %d\n", ret); - return; - } - pb->remaining -= i; - + /* If we need to flush in flush notify, then we have a problem anyway. */ for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->packet, 0, - (r->flags & (NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | - NOUVEAU_BO_RDWR)) | - NOUVEAU_BO_DUMMY, 0, 0))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1); - return; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (r->mthd & 0x40000000) { + debug_printf("error: NI mthd 0x%08X\n", r->mthd); + continue; } - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->data, 0, - r->flags | NOUVEAU_BO_DUMMY, - r->vor, r->tor))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1) - 1; - return; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* The object needs to be bound and the system must know the + * subchannel is being used. Otherwise it will discard it. + */ + if (gr != r->gr) { + BEGIN_RING(chan, r->gr, 0x100, 1); + OUT_RING(chan, 0); + gr = r->gr; + } + + /* Some relocs really don't like to be hammered, + * NOUVEAU_BO_DUMMY makes sure it only + * happens when needed. + */ + ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) | + r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART + | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); } + + ret = OUT_RELOC(chan, r->bo, r->data, r->flags | + NOUVEAU_BO_DUMMY, r->vor, r->tor); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); + } + + pb->remaining -= 2; } } diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 42c77e5e778..4c3e08a43f5 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -23,6 +23,9 @@ #define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) #define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18) +/* use along with GPU_WRITE for 2D-only writes */ +#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) + extern struct pipe_screen * nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 4b33636b2eb..edd96859cf8 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv04->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -30,32 +34,36 @@ nv04_destroy(struct pipe_context *pipe) static boolean nv04_init_hwctx(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + // requires a valid handle -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); // OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); - OUT_RING(0); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(chan, 0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(0x40182800); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, 0x40182800); // OUT_RING(1<<20/*no cull*/); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); // OUT_RING(0x24|(1<<6)|(1<<8)); - OUT_RING(0x120001a4); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); - OUT_RING(0x332213a1); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); - OUT_RING(0x11001010); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); - OUT_RING(0x0); -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); + OUT_RING(chan, 0x120001a4); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(chan, 0x332213a1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(chan, 0x11001010); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(chan, 0x0); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); // OUT_RING(SCREEN_OFFSET); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); - OUT_RING(0xff000000); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(chan, 0xff000000); - FIRE_RING (NULL); + FIRE_RING (chan); return TRUE; } diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 55326c787a8..fe3b527423c 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv04_screen *ctx = nv04->screen -#include "nouveau/nouveau_push.h" - #include "nv04_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -141,9 +137,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04); extern void nv04_state_tex_update(struct nv04_context *nv04); /* nv04_vbo.c */ -extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv04_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv04_draw_elements( struct pipe_context *pipe, +extern void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 0cce71ad1de..c152b52119a 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -4,7 +4,7 @@ #define _(m,tf) \ { \ PIPE_FORMAT_##m, \ - NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ + NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ } struct nv04_texture_format { @@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit) return; } - nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER | nv04_fragtex_format(pt->format) - | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE ; } diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index f6458232ae5..0b795ea2430 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render, static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RINGp(buffer + VERTEX_SIZE * v4,8); - OUT_RINGp(buffer + VERTEX_SIZE * v5,8); - OUT_RING(0xFEDCBA); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8); + OUT_RING(chan, 0xFEDCBA); } static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RING(0xFED); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RING(chan, 0xFED); } static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RING(0xFECEDC); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RING(chan, 0xFECEDC); } static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) @@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con { const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; for(i = 0; i<nr_indices; i+=14) @@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con if (numvert<3) break; - BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8); for(j = 0; j<numvert; j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) - OUT_RING(striptbl[j]); + OUT_RING(chan, striptbl[j]); if (numtri%2) - OUT_RING(striptbl[numtri/2]&0xFFF); + OUT_RING(chan, striptbl[numtri/2]&0xFFF); } } @@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const { const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); - OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8); for(i = 1; i<nr_indices; i+=14) { @@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const if (numvert < 3) break; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); for(j=0;j<numvert;j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) - OUT_RING(fantbl[j]); + OUT_RING(chan, fantbl[j]); if (numtri%2) - OUT_RING(fantbl[numtri/2]&0xFFF); + OUT_RING(chan, fantbl[numtri/2]&0xFFF); } } diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 170ce3eb7e5..7c5b6e8229a 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->fahrenheit); nv04_surface_2d_takedown(&screen->eng2d); + nouveau_screen_fini(&screen->base); + FREE(pscreen); } @@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) fahrenheit_class = 0; sub3d_class = 0; } else if (dev->chipset >= 0x10) { - fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + fahrenheit_class = NV10_TEXTURED_TRIANGLE; sub3d_class = NV10_CONTEXT_SURFACES_3D; } else { - fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + fahrenheit_class=NV04_TEXTURED_TRIANGLE; sub3d_class = NV04_CONTEXT_SURFACES_3D; } diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index 871034ad0b9..b67f1e16b1c 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) { switch (wrap) { case PIPE_TEX_WRAP_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; break; case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; break; case PIPE_TEX_WRAP_CLAMP: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP: default: NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; } - return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; + return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; } static void * @@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe, ss = MALLOC(sizeof(struct nv04_sampler_state)); - ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | - (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); if (cso->max_anisotropy > 1.0) { - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; } switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; break; } @@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, case PIPE_TEX_FILTER_LINEAR: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; break; } break; @@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, default: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; break; } break; @@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe, */ rs = MALLOC(sizeof(struct nv04_rasterizer_state)); - rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; return (void *)rs; } @@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); hw->control = float_to_ubyte(cso->alpha.ref_value); - hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); - hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; - hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; - hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; - hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0; + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format return (void *)hw; } @@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe, /* struct nv04_context *nv04 = nv04_context(pipe); // XXX - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); OUT_RING (((s->maxx - s->minx) << 16) | s->minx); OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ } diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index eb2c1c57c67..b8d6dc560f0 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f) static void nv04_emit_control(struct nv04_context* nv04) { uint32_t control = nv04->dsa->control; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, control); } static void nv04_emit_blend(struct nv04_context* nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; uint32_t blend; blend=0x4; // texture MODULATE_ALPHA @@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04) blend|=(nv04_blend_func(nv04->blend->b_src)<<24); blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); - OUT_RING(blend); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(chan, blend); } static void nv04_emit_sampler(struct nv04_context *nv04, int unit) { struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; struct pipe_texture *pt = &nv04mt->base; - - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING(nv04->sampler[unit]->filter); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(chan, nv04->sampler[unit]->filter); } static void nv04_state_emit_framebuffer(struct nv04_context* nv04) @@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv04_miptree *nv04mt = 0; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; + struct nouveau_bo *bo; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) assert(0); } - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); - OUT_RING(rt_format); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(chan, rt_format); nv04mt = (struct nv04_miptree *)rt->base.texture; + bo = nouveau_bo(nv04mt->buffer); /* FIXME pitches have to be aligned ! */ - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt->pitch|(zeta->pitch<<16)); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt->pitch|(zeta->pitch<<16)); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (fb->zsbuf) { nv04mt = (struct nv04_miptree *)zeta->base.texture; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } } void nv04_emit_hw_state(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; int i; if (nv04->dirty & NV04_NEW_VERTPROG) { @@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (nv04->dirty & NV04_NEW_CONTROL) { nv04->dirty &= ~NV04_NEW_CONTROL; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(nv04->dsa->control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, nv04->dsa->control); } if (nv04->dirty & NV04_NEW_BLEND) { @@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04) unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch; unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt_pitch|(zeta_pitch<<16)); - OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt_pitch|(zeta_pitch<<16)); + OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv04->zeta) { - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } /* Texture images */ @@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (!(nv04->fp_samplers & (1 << i))) continue; struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); } } diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 12df7fd1997..b24a9cee5ae 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -77,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format) } static INLINE unsigned -nv04_swizzle_bits(unsigned x, unsigned y) +nv04_swizzle_bits_square(unsigned x, unsigned y) { unsigned u = (x & 0x001) << 0 | (x & 0x002) << 1 | @@ -107,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y) return v | u; } +/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) +{ + unsigned s = MIN2(w, h); + unsigned m = s - 1; + return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); +} + static int nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, @@ -158,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, for (x = 0; x < w; x += sub_w) { sub_w = MIN2(sub_w, w - x); - /* Must be 64-byte aligned */ - assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63)); + assert(!(dst->offset & 63)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format), + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); @@ -491,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen) ctx->fill = nv04_surface_fill; return ctx; } + +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) +{ + int temp_flags; + + // printf("creating temp, flags is %i!\n", flags); + + if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; + } + else + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; + } + + struct nv40_screen* screen = (struct nv40_screen*)pscreen; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + + struct pipe_texture templ; + memset(&templ, 0, sizeof(templ)); + templ.format = ns->base.texture->format; + templ.target = PIPE_TEXTURE_2D; + templ.width0 = ns->base.width; + templ.height0 = ns->base.height; + templ.depth0 = 1; + templ.last_level = 0; + + // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented + templ.nr_samples = ns->base.texture->nr_samples; + + templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); + struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); + temp_ns->backing = ns; + + if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) + eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); + + return temp_ns; +} + diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h index 02b3f56ba8b..ce696a11a39 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.h +++ b/src/gallium/drivers/nv04/nv04_surface_2d.h @@ -4,6 +4,7 @@ struct nv04_surface { struct pipe_surface base; unsigned pitch; + struct nv04_surface* backing; }; struct nv04_surface_2d { @@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen); void nv04_surface_2d_takedown(struct nv04_surface_2d **); +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); + #endif diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 8446073ae80..2dd2e146a8f 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -16,14 +16,14 @@ struct nv04_transfer { }; static void -nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv04_compatible_transfer_tex(pt, level, &tx_tex_template); + nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index e3167814f2b..34847718145 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv04_draw_elements( struct pipe_context *pipe, +void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv04->constbuf[PIPE_SHADER_VERTEX], nv04->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv04_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { printf("coucou in draw arrays\n"); - return nv04_draw_elements(pipe, NULL, 0, prim, start, count); + nv04_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 0dadeb03dd3..1ecb73d06e8 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv10->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,225 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10) { struct nv10_screen *screen = nv10->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; float projectionmatrix[16]; - BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0x7ff<<16)|0x800); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); for (i=1;i<8;i++) { - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, 0x290, 1); - OUT_RING ((0x10<<16)|1); - BEGIN_RING(celsius, 0x3f4, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, 0x290, 1); + OUT_RING (chan, (0x10<<16)|1); + BEGIN_RING(chan, celsius, 0x3f4, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); if (nv10->screen->celsius->grclass != NV10TCL) { /* For nv11, nv17 */ - BEGIN_RING(celsius, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, celsius, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); /* Set state */ - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (0x207); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); - OUT_RING (0); - OUT_RING (0); - - BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); - OUT_RING (0x30141010); - OUT_RING (0); - OUT_RING (0x20040000); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0x18000000); - OUT_RING (0x300e0300); - OUT_RING (0x0c091c80); - - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); - OUT_RING (1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); - OUT_RING (1); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x8006); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); - OUT_RING (0xff); - OUT_RING (0x207); - OUT_RING (0); - OUT_RING (0xff); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1d01); - BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (0x201); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (0x1b02); - OUT_RING (0x1b02); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (0x405); - OUT_RING (0x901); - BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + + BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (chan, 0x30141010); + OUT_RING (chan, 0); + OUT_RING (chan, 0x20040000); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0x18000000); + OUT_RING (chan, 0x300e0300); + OUT_RING (chan, 0x0c091c80); + + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x8006); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1d01); + BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, 0x201); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, 0x1b02); + OUT_RING (chan, 0x1b02); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, 0x405); + OUT_RING (chan, 0x901); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8); for (i=0;i<8;i++) { - OUT_RING (0); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RING (0x3fc00000); /* -1.50 */ - OUT_RING (0xbdb8aa0a); /* -0.09 */ - OUT_RING (0); /* 0.00 */ + BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (chan, 0x3fc00000); /* -1.50 */ + OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */ + OUT_RING (chan, 0); /* 0.00 */ - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); - OUT_RING (0x802); - OUT_RING (2); + BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (chan, 0x802); + OUT_RING (chan, 2); /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when * using texturing, except when using the texture matrix */ - BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); - OUT_RING (6); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (0x01010101); + BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (chan, 6); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x01010101); /* Set vertex component */ - BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); - OUT_RINGf (0.0); - BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (chan, 0.0); + BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); memset(projectionmatrix, 0, sizeof(projectionmatrix)); - BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 1.0; projectionmatrix[3*4+3] = 1.0; for (i=0;i<16;i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); - OUT_RING (0.0); - OUT_RINGf (16777216.0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (chan, 0.0); + OUT_RINGf (chan, 16777216.0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (-2048.0); - OUT_RINGf (-2048.0); - OUT_RINGf (16777215.0 * 0.5); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RING (chan, 0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 36a6aa7a74e..ab4b825487d 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv10_screen *ctx = nv10->screen -#include "nouveau/nouveau_push.h" - #include "nv10_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -144,9 +140,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10); extern void nv10_state_tex_update(struct nv10_context *nv10); /* nv10_vbo.c */ -extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv10_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv10_draw_elements( struct pipe_context *pipe, +extern void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 906fdfeeb93..c1f7ccb9ab6 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; struct pipe_texture *pt = &nv10mt->base; struct nv10_texture_format *tf; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; uint32_t txf, txs, txp; tf = nv10_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) return; } - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10) { #if 0 struct nv10_fragment_program *fp = nv10->fragprog.active; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; unsigned samplers, unit; samplers = nv10->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv10->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 7ba9777a222..c5dbe43dbc8 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -67,12 +67,15 @@ struct nv10_vbuf_render { void nv10_vtxbuf_bind( struct nv10_context* nv10 ) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; for(i = 0; i < 8; i++) { - BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv10->vtxbuf*/); - BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv10->vtxbuf*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1); + OUT_RING(chan, 0/*XXX*/); } } @@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render, { struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); struct nv10_context *nv10 = nv10_render->nv10; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int push, i; nv10_emit_hw_state(nv10); - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv10_render->hwprim); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv10_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index ee5901e743e..69a6dab866a 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->celsius); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -177,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->celsius, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 2577ab73b56..30a596ca604 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -4,25 +4,32 @@ static void nv10_state_emit_blend(struct nv10_context* nv10) { struct nv10_blend_state *b = nv10->blend; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (b->b_enable); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (chan, b->b_enable); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv10_state_emit_blend_color(struct nv10_context* nv10) { struct pipe_blend_color *c = nv10->blend_color; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10) static void nv10_state_emit_rast(struct nv10_context* nv10) { struct nv10_rasterizer_state *r = nv10->rast; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv10_state_emit_dsa(struct nv10_context* nv10) { struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); #if 0 - BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv10_state_emit_viewport(struct nv10_context* nv10) @@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) int colour_format = 0, zeta_format = 0; struct nv10_miptree *nv10mt = 0; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; colour_format = fb->cbufs[0]->format; @@ -144,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) } if (zeta) { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv10mt = (struct nv10_miptree *)rt->base.texture; @@ -160,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) nv10->zeta = nv10mt->buffer; } - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0 | 0x08000800); - OUT_RING (((h - 1) << 16) | 0 | 0x08000800); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); + OUT_RING (chan, rt_format); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800); } static void nv10_vertex_layout(struct nv10_context *nv10) @@ -201,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10) void nv10_emit_hw_state(struct nv10_context *nv10) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + struct nouveau_bo *rt_bo; int i; if (nv10->dirty & NV10_NEW_VERTPROG) { @@ -269,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10) */ /* Render target */ + rt_bo = nouveau_bo(nv10->rt[0]); // XXX figre out who's who for NV10TCL_DMA_* and fill accordingly -// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); -// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv10->zeta) { + struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta); // XXX -// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); -// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv10->zeta + lma_offset);*/ +/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv10->fp_samplers & (1 << i))) continue; - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv10->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, NV10TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index c664973e904..eb04af9782e 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -16,14 +16,14 @@ struct nv10_transfer { }; static void -nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv10_compatible_transfer_tex(pt, level, &tx_tex_template); + nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 441a4f75f3f..9180c72c9b0 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv10_draw_elements( struct pipe_context *pipe, +void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, } draw_set_mapped_constant_buffer(draw, + PIPE_SHADER_VERTEX, nv10->constbuf[PIPE_SHADER_VERTEX], nv10->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -64,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv10_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { - return nv10_draw_elements(pipe, NULL, 0, prim, start, count); + nv10_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index 6a147a4159c..5b80af2d22a 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv20->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,348 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20) { struct nv20_screen *screen = nv20->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; float projectionmatrix[16]; - const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + const boolean is_nv25tcl = (kelvin->grclass == NV25TCL); - BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); /* TEXTURE1 */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); /* ZETA */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); /* ZETA */ - BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); - OUT_RING (0); /* renouveau: beef0351, unique */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1); + OUT_RING (chan, 0); /* renouveau: beef0351, unique */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0xfff << 16) | 0x0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x17e0, 3); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); + BEGIN_RING(chan, kelvin, 0x17e0, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); } else { - BEGIN_RING(kelvin, 0x1e68, 1); - OUT_RING (0x4b800000); /* 16777216.000000 */ - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL); + BEGIN_RING(chan, kelvin, 0x1e68, 1); + OUT_RING (chan, 0x4b800000); /* 16777216.000000 */ + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL); } - BEGIN_RING(kelvin, 0x290, 1); - OUT_RING ((0x10 << 16) | 1); - BEGIN_RING(kelvin, 0x9fc, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x1d80, 1); - OUT_RING (1); - BEGIN_RING(kelvin, 0x9f8, 1); - OUT_RING (4); - BEGIN_RING(kelvin, 0x17ec, 3); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, 0x290, 1); + OUT_RING (chan, (0x10 << 16) | 1); + BEGIN_RING(chan, kelvin, 0x9fc, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x1d80, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, 0x9f8, 1); + OUT_RING (chan, 4); + BEGIN_RING(chan, kelvin, 0x17ec, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 0.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d88, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d88, 1); + OUT_RING (chan, 3); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); - OUT_RING (chan->vram->handle); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1); + OUT_RING (chan, chan->vram->handle); } - BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); - OUT_RING (0); /* renouveau: beef1e10 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1); + OUT_RING (chan, 0); /* renouveau: beef1e10 */ - BEGIN_RING(kelvin, 0x1e98, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1e98, 1); + OUT_RING (chan, 0); #if 0 if (is_nv25tcl) { - BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ - OUT_RING (NvDmaFB); /* renouveau: beef0201 */ + BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ + OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */ - BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ } #endif - BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, kelvin, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); /* error: ILLEGAL_MTHD, PROTECTION_FAULT - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); - OUT_RINGf (512.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 512.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x022c, 2); - OUT_RING (0x280); - OUT_RING (0x07d28000); + BEGIN_RING(chan, kelvin, 0x022c, 2); + OUT_RING (chan, 0x280); + OUT_RING (chan, 0x07d28000); } /* * illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c2c, 1); - OUT_RING (0); */ + BEGIN_RING(chan, NvSub3D, 0x1c2c, 1); + OUT_RING (chan, 0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1da4, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1da4, 1); + OUT_RING (chan, 0); } /* * crashes with illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c18, 1); - OUT_RING (0x200); */ + BEGIN_RING(chan, NvSub3D, 0x1c18, 1); + OUT_RING (chan, 0x200); */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING ((0 << 16) | 0); - OUT_RING ((0 << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, (0 << 16) | 0); + OUT_RING (chan, (0 << 16) | 0); /* *** Set state *** */ - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */ + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); - OUT_RING (0x30d410d0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); - OUT_RING (0x00011101); - BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); - OUT_RING (0x130e0300); - OUT_RING (0x0c091c80); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); - OUT_RING (0x20c400c0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); - OUT_RING (0x035125a0); - OUT_RING (0); - OUT_RING (0x40002000); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (0xffff0000); - - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); - OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE); - OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO); - OUT_RING (0); /* NV20TCL_BLEND_COLOR */ - OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RING (0xff); - OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */ - OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */ - OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP); - - BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (0); - OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY); - BEGIN_RING(kelvin, 0x17cc, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4); + OUT_RING (chan, 0x30d410d0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1); + OUT_RING (chan, 0x00011101); + BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2); + OUT_RING (chan, 0x130e0300); + OUT_RING (chan, 0x0c091c80); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4); + OUT_RING (chan, 0x20c400c0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); + OUT_RING (chan, 0x035125a0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x40002000); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (chan, 0xffff0000); + + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE); + OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO); + OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */ + OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RING (chan, 0xff); + OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */ + OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */ + OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP); + + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY); + BEGIN_RING(chan, kelvin, 0x17cc, 1); + OUT_RING (chan, 0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 1); } - BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); - OUT_RING (0x00020000); - BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), + BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1); + OUT_RING (chan, 0x00020000); + BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { - OUT_RING(0xffffffff); + OUT_RING(chan, 0xffffffff); } - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (NV20TCL_DEPTH_FUNC_LESS); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RINGf (0.0); - OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); if (!is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 3); } - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); if (!is_nv25tcl) { - OUT_RING (8); + OUT_RING (chan, 8); } else { - OUT_RINGf (1.0); + OUT_RINGf (chan, 1.0); } if (!is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */ } else { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x0a1c, 1); - OUT_RING (0x800); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x0a1c, 1); + OUT_RING (chan, 0x800); } - BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL); - OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (NV20TCL_CULL_FACE_BACK); - OUT_RING (NV20TCL_FRONT_FACE_CCW); - BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); - OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, NV20TCL_CULL_FACE_BACK); + OUT_RING (chan, NV20TCL_FRONT_FACE_CCW); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1); + OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { - OUT_RING(0); + OUT_RING(chan, 0); } - BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RINGf (1.5); - OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ - OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ - BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); - OUT_RING (NV20TCL_FOG_MODE_EXP_2); - OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG); - BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.FOG_COLOR */ - BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); - OUT_RING (NV20TCL_ENGINE_FIXED); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RINGf (chan, 1.5); + OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ + OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ + BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2); + OUT_RING (chan, NV20TCL_FOG_MODE_EXP_SIGNED); + OUT_RING (chan, NV20TCL_FOG_COORD_FOG); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */ + BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1); + OUT_RING (chan, NV20TCL_ENGINE_FIXED); for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); - OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); - OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); for (i = 4; i < 16; ++i) { - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 1.0); } - BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (0x00010101); - BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x00010101); + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING (chan, 0); memset(projectionmatrix, 0, sizeof(projectionmatrix)); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 16777215.0; projectionmatrix[3*4+3] = 1.0; - BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); for (i = 0; i < 16; i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); - OUT_RINGf (0.0); - OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ - OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ - OUT_RINGf (0.0); - OUT_RINGf (16777215.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */ + OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */ + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777215.0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (0.0); /* no effect?, w/2 */ - OUT_RINGf (0.0); /* no effect?, h/2 */ - OUT_RINGf (16777215.0 * 0.5); - OUT_RINGf (65535.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (chan, 0.0); /* no effect?, w/2 */ + OUT_RINGf (chan, 0.0); /* no effect?, h/2 */ + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RINGf (chan, 65535.0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h index a4eaa956608..c7dfadaa311 100644 --- a/src/gallium/drivers/nv20/nv20_context.h +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv20_screen *ctx = nv20->screen -#include "nouveau/nouveau_push.h" - #include "nv20_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -143,9 +139,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20); extern void nv20_state_tex_update(struct nv20_context *nv20); /* nv20_vbo.c */ -extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv20_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv20_draw_elements( struct pipe_context *pipe, +extern void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c index 2db4a4015a6..dedbec73f39 100644 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; struct pipe_texture *pt = &nv20mt->base; struct nv20_texture_format *tf; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t txf, txs, txp; tf = nv20_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) return; } - BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20) { #if 0 struct nv20_fragment_program *fp = nv20->fragprog.active; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; unsigned samplers, unit; samplers = nv20->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv20->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index d1291a92e0a..8f7538e7f57 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -6,6 +6,7 @@ #include "nv20_context.h" #include "nv20_screen.h" +#include "../nv04/nv04_surface_2d.h" static void nv20_miptree_layout(struct nv20_miptree *nv20mt) @@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv20_miptree_layout(mt); mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size); @@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ns->base.offset = nv20mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base; + return &ns->base; } static void nv20_miptree_surface_destroy(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv20_miptree_surface_destroy(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c index ddfcdb8057a..2e145672da1 100644 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render) void nv20_vtxbuf_bind( struct nv20_context* nv20 ) { #if 0 + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv20->vtxbuf*/); - BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv20->vtxbuf*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1); + OUT_RING(chan, 0/*XXX*/); } #endif } @@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) static unsigned nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t hwfmt = 0; unsigned fields; @@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) return 0; } - BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); - OUT_RING(hwfmt); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1); + OUT_RING(chan, hwfmt); return fields; } @@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; struct vertex_info *vinfo = &nv20->vertex_info; unsigned nr_fields; int max_push; @@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, nr_fields = nv20__emit_vertex_array_format(nv20); - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); max_push = 1200 / nr_fields; while (nr_indices) { int i; int push = MIN2(nr_indices, max_push); - BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); + BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); for (i = 0; i < push; i++) { /* XXX: fixme to handle other than floats? */ int f = nr_fields; float *attrv = (float*)&data[indices[i] * vsz]; while (f-- > 0) - OUT_RINGf(*attrv++); + OUT_RINGf(chan, *attrv++); } nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP); } static void @@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int push, i; NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); - BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv20_render->pbuffer, 0, + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } static void diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index 4eeacd1afd5..d091335063b 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->kelvin); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -173,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->kelvin, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 0122b1c2cdb..6bbd1fdae9d 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -5,27 +5,34 @@ static void nv20_state_emit_blend(struct nv20_context* nv20) { struct nv20_blend_state *b = nv20->blend; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (b->b_enable); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, b->b_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv20_state_emit_blend_color(struct nv20_context* nv20) { struct pipe_blend_color *c = nv20->blend_color; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20) static void nv20_state_emit_rast(struct nv20_context* nv20) { struct nv20_rasterizer_state *r = nv20->rast; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv20_state_emit_dsa(struct nv20_context* nv20) { struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); #if 0 - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv20_state_emit_viewport(struct nv20_context* nv20) @@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20) { /* NV20TCL_SCISSOR_* is probably a software method */ /* struct pipe_scissor_state *s = nv20->scissor; - BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + + BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2); + OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/ } static void nv20_state_emit_framebuffer(struct nv20_context* nv20) @@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv20_miptree *nv20mt = 0; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -150,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) } if (zeta) { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv20mt = (struct nv20_miptree *)rt->base.texture; @@ -166,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) nv20->zeta = nv20mt->buffer; } - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */ - OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */ + OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */ + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0); + OUT_RING (chan, ((h - 1) << 16) | 0); } static void nv20_vertex_layout(struct nv20_context *nv20) @@ -228,7 +248,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) } /* always do position */ { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->hwfmt[0] |= (1 << 0); } @@ -237,19 +257,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 4; i < 6; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 3)); } if (colors[0]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 3); } if (colors[1]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 4); } @@ -258,7 +278,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 6; i < 10; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 1)); } @@ -267,7 +287,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 0; i < 4; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 9)); } @@ -276,13 +296,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 10; i < 12; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 3)); } if (fog) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << 15); } @@ -293,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20) void nv20_emit_hw_state(struct nv20_context *nv20) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + struct nouveau_bo *rt_bo; int i; if (nv20->dirty & NV20_NEW_VERTPROG) { @@ -361,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20) */ /* Render target */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + rt_bo = nouveau_bo(nv20->rt[0]); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv20->zeta) { - BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); - OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1); + OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv20->zeta + lma_offset);*/ +/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv20->fp_samplers & (1 << i))) continue; - BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer); + BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, + BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv20->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, NV20TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 69b79c809f4..699773e8e6f 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -16,14 +16,14 @@ struct nv20_transfer { }; static void -nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv20_compatible_transfer_tex(pt, level, &tx_tex_template); + nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 84d7db6c5e2..52991a0d856 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv20_draw_elements( struct pipe_context *pipe, +void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv20->constbuf[PIPE_SHADER_VERTEX], nv20->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe, } draw_flush(nv20->draw); - return TRUE; } -boolean nv20_draw_arrays( struct pipe_context *pipe, +void nv20_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return nv20_draw_elements(pipe, NULL, 0, prim, start, count); + nv20_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 46a821a48b1..54572e9ab3a 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -10,21 +10,32 @@ nv30_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void nv30_destroy(struct pipe_context *pipe) { struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (nv30->state.hw[i]) + so_ref(NULL, &nv30->state.hw[i]); + } if (nv30->draw) draw_destroy(nv30->draw); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 864ddaeb598..e59449287b5 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv30_screen *ctx = nv30->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv30_state.h" @@ -198,9 +194,9 @@ extern struct nv30_state_entry nv30_state_fragtex; extern struct nv30_state_entry nv30_state_vbo; /* nv30_vbo.c */ -extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv30_draw_elements(struct pipe_context *pipe, +extern void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 40965a97723..2d565cb631b 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv30_sr(NV30SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV30_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SIN: @@ -821,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); - so = so_new(8, 1); + so = so_new(4, 4, 1); so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | @@ -870,6 +886,12 @@ void nv30_fragprog_destroy(struct nv30_context *nv30, struct nv30_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index b3293ee700d..98935678911 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -106,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(1, 8, 2); so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -135,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index ce95d9700f6..8fbba38e78f 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv30_context.h" +#include "../nv04/nv04_surface_2d.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) @@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv30_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, @@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = nv30mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv30_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv30_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index 1d1c8a484e1..e27e9ccbf60 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_query *q = nv30_query(pq); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv30->screen->query, q->object->start); - BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -69,12 +72,15 @@ static void nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_query *q = nv30_query(pq); - BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 7cd36902eb4..9ed48178dc2 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -156,6 +156,12 @@ static void nv30_screen_destroy(struct pipe_screen *pscreen) { struct nv30_screen *screen = nv30_screen(pscreen); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->rankine); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -224,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->rankine, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -261,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static rankine initialisation */ - so = so_new(128, 0); + so = so_new(36, 60, 0); so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 268d3a8ab85..065c927a10d 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe, struct nv30_context *nv30 = nv30_context(pipe); struct nouveau_grobj *rankine = nv30->screen->rankine; struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); @@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(9, 19, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; /*XXX: ignored: @@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(5, 21, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c index 64cf9ae93a0..c36d58c040c 100644 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = { static boolean nv30_state_blend_colour_validate(struct nv30_context *nv30) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv30->blend_colour; so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 6f6d1740d6e..2ed2ea55e84 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -10,7 +10,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nv04_surface *rt[2], *zeta = NULL; uint32_t rt_enable = 0, rt_format = 0; int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(12, 18, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c index 3ac7a8471ea..ba61a9e24a4 100644 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30) return FALSE; nv30->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); if (nv30->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c index d0c791ac082..ed520a4f439 100644 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv30->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c index c3eb413dac6..2d7781292bd 100644 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30) return FALSE; nv30->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(3, 10, 0); if (!bypass) { so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 2255a02caed..65598991c68 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -16,14 +16,14 @@ struct nv30_transfer { }; static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv30_compatible_transfer_tex(pt, level, &tx_tex_template); + nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index e32b8141af8..1c5db03ea24 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -163,19 +163,21 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; nv30_vbo_set_idxbuf(nv30, NULL, 0); if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } while (count) { @@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; @@ -228,7 +230,9 @@ static INLINE void nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv30_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv30_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv30_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; while (count) { @@ -412,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -461,7 +468,7 @@ nv30_draw_elements(struct pipe_context *pipe, if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } if (idxbuf) { @@ -472,7 +479,6 @@ nv30_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -485,9 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); for (hw = 0; hw < nv30->vtxelt_nr; hw++) { @@ -500,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 5d609846229..e77a5be3f23 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -650,7 +650,9 @@ static boolean nv30_vertprog_validate(struct nv30_context *nv30) { struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -684,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) assert(0); } - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_ref(so, &vp->so); @@ -770,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30) 4 * sizeof(float)); } - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -788,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30) vp->insns[i].data[2], vp->insns[i].data[3]); } #endif - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index eb9cce4c786..f79ae4db84e 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -10,21 +10,32 @@ nv40_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void nv40_destroy(struct pipe_context *pipe) { struct nv40_context *nv40 = nv40_context(pipe); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (nv40->state.hw[i]) + so_ref(NULL, &nv40->state.hw[i]); + } if (nv40->draw) draw_destroy(nv40->draw); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 83fcf1785d9..e219bb537ac 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_screen *ctx = nv40->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv40_state.h" @@ -183,7 +179,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); -extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, +extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned ib_size, unsigned mode, unsigned start, unsigned count); @@ -219,9 +215,9 @@ extern struct nv40_state_entry nv40_state_vbo; extern struct nv40_state_entry nv40_state_vtxfmt; /* nv40_vbo.c */ -extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv40_draw_elements(struct pipe_context *pipe, +extern void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index b2f19ecb699..d826f8c2f5f 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage) static INLINE void nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) { + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; for (i = 0; i < nv40->swtnl.nr_attribs; i++) { @@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) case EMIT_OMIT: break; case EMIT_1F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (fui(v->data[idx][0])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); break; case EMIT_2F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); break; case EMIT_3F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); break; case EMIT_4F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); - OUT_RING (fui(v->data[idx][3])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); break; case EMIT_4UB: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), float_to_ubyte(v->data[idx][1]), float_to_ubyte(v->data[idx][2]), float_to_ubyte(v->data[idx][3]))); @@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, { struct nv40_render_stage *rs = nv40_render_stage(stage); struct nv40_context *nv40 = rs->nv40; - struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf; + + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *curie = screen->curie; unsigned i; /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ @@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, NOUVEAU_ERR("AIII, missed flush\n"); assert(0); } - FIRE_RING(NULL); + FIRE_RING(chan); nv40_state_emit(nv40); } /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (mode); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, mode); rs->prim = mode; } @@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, * off the primitive now. */ if (pb->remaining < ((count * 20) + 6)) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags) { struct nv40_render_stage *rs = nv40_render_stage(draw); struct nv40_context *nv40 = rs->nv40; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -226,7 +236,7 @@ nv40_draw_render_stage(struct nv40_context *nv40) return &render->stage; } -boolean +void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned idxbuf_size, unsigned mode, unsigned start, unsigned count) @@ -237,7 +247,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, void *map; if (!nv40_state_validate_swtnl(nv40)) - return FALSE; + return; nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); @@ -261,7 +271,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, map, nr); + draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, + map, nr); } draw_arrays(nv40->draw, mode, start, count); @@ -277,15 +288,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_flush(nv40->draw); pipe->flush(pipe, 0, NULL); - - return TRUE; } static INLINE void emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, unsigned semantic, unsigned index) { - unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index); unsigned a = nv40->swtnl.nr_attribs++; nv40->swtnl.hw[a] = hw; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 1bf16726d10..1237066c398 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); break; case NV40SR_NONE: sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); @@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv40_sr(NV40SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV40_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, neg(swz(tmp, X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SEQ: @@ -752,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) { struct tgsi_full_immediate *imm; float vals[4]; - + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -836,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->insn[fpc->inst_offset + 1] = 0x00000000; fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - + fp->translated = TRUE; out_err: tgsi_parse_free(&parse); @@ -903,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); - so = so_new(4, 1); + so = so_new(2, 2, 1); so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | @@ -917,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) update_constants: if (fp->nr_consts) { float *map; - + map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { @@ -948,6 +964,12 @@ void nv40_fragprog_destroy(struct nv40_context *nv40, struct nv40_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 44abc845969..aad9198210f 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -108,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(2, 9, 2); so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -139,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index b974e68a077..89bd155ff49 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv40_context.h" +#include "../nv04/nv04_surface_2d.h" @@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv40_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); @@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv40_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv40_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 7874aedd428..8ed4a67dd03 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv40->screen->query, q->object->start); - BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; - BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index bd13dfddd1c..9e55e5a089c 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -140,6 +140,12 @@ static void nv40_screen_destroy(struct pipe_screen *pscreen) { struct nv40_screen *screen = nv40_screen(pscreen); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->curie); + nv04_surface_2d_takedown(&screen->eng2d); nouveau_screen_fini(&screen->base); @@ -208,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->curie, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -245,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static curie initialisation */ - so = so_new(128, 0); + so = so_new(16, 25, 0); so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index f6e5dee8145..7d990f7d567 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); @@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(8, 18, 0); struct nouveau_grobj *curie = nv40->screen->curie; /*XXX: ignored: @@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(4, 21, 0); struct nouveau_grobj *curie = nv40->screen->curie; so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index 8cd05ce66ef..3ff00a37f66 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = { static boolean nv40_state_blend_colour_validate(struct nv40_context *nv40) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv40->blend_colour; so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 789ed16126a..13fe854915b 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -54,9 +54,10 @@ nv40_state_do_validate(struct nv40_context *nv40, void nv40_state_emit(struct nv40_context *nv40) { - struct nouveau_channel *chan = nv40->screen->base.channel; struct nv40_state *state = &nv40->state; struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; uint64_t states; @@ -80,10 +81,10 @@ nv40_state_emit(struct nv40_context *nv40) if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | (1ULL << NV40_STATE_FRAGTEX0))) { - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); } state->dirty = 0; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 1c7a7cd64f0..a58fe9ddb19 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) struct nv04_surface *rt[4], *zeta; uint32_t rt_enable, rt_format; int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(18, 24, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index cf58d33906a..753a505e934 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40) return FALSE; nv40->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); if (nv40->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index b51024ad9b2..2b371ebfec0 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv40->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 665d2d5fcac..9919ba1d0b0 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) return FALSE; nv40->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(2, 9, 0); if (!bypass) { so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index b084a38b482..791ee6823d3 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -16,14 +16,14 @@ struct nv40_transfer { }; static void -nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv40_compatible_transfer_tex(pt, level, &tx_tex_template); + nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index af3fcf6a348..a777898f688 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -164,18 +164,21 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; nv40_vbo_set_idxbuf(nv40, NULL, 0); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } while (count) { @@ -186,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,29 +209,30 @@ nv40_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } pipe->flush(pipe, 0, NULL); - return TRUE; } static INLINE void nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv40_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv40_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; while (count) { @@ -412,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -459,8 +466,9 @@ nv40_draw_elements(struct pipe_context *pipe, idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } if (idxbuf) { @@ -471,7 +479,6 @@ nv40_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -484,9 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); for (hw = 0; hw < nv40->vtxelt_nr; hw++) { @@ -499,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index d9fc31006f5..8d80fcad38e 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -834,7 +834,9 @@ static boolean nv40_vertprog_validate(struct nv40_context *nv40) { struct pipe_screen *pscreen = nv40->pipe.screen; - struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; struct nv40_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -884,7 +886,7 @@ check_gpu_resources: assert(0); } - so = so_new(7, 0); + so = so_new(3, 4, 0); so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); @@ -974,9 +976,9 @@ check_gpu_resources: 4 * sizeof(float)); } - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -993,11 +995,11 @@ check_gpu_resources: NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); } #endif - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index d21b80eab8d..5997456e4c9 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -43,6 +43,39 @@ nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); + if (nv50->state.fb) + so_ref(NULL, &nv50->state.fb); + if (nv50->state.blend) + so_ref(NULL, &nv50->state.blend); + if (nv50->state.blend_colour) + so_ref(NULL, &nv50->state.blend_colour); + if (nv50->state.zsa) + so_ref(NULL, &nv50->state.zsa); + if (nv50->state.rast) + so_ref(NULL, &nv50->state.rast); + if (nv50->state.stipple) + so_ref(NULL, &nv50->state.stipple); + if (nv50->state.scissor) + so_ref(NULL, &nv50->state.scissor); + if (nv50->state.viewport) + so_ref(NULL, &nv50->state.viewport); + if (nv50->state.tsc_upload) + so_ref(NULL, &nv50->state.tsc_upload); + if (nv50->state.tic_upload) + so_ref(NULL, &nv50->state.tic_upload); + if (nv50->state.vertprog) + so_ref(NULL, &nv50->state.vertprog); + if (nv50->state.fragprog) + so_ref(NULL, &nv50->state.fragprog); + if (nv50->state.programs) + so_ref(NULL, &nv50->state.programs); + if (nv50->state.vtxfmt) + so_ref(NULL, &nv50->state.vtxfmt); + if (nv50->state.vtxbuf) + so_ref(NULL, &nv50->state.vtxbuf); + if (nv50->state.vtxattr) + so_ref(NULL, &nv50->state.vtxattr); + draw_destroy(nv50->draw); FREE(nv50); } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 5578a5838fb..cbd4c3ff86d 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ -extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv50_draw_elements(struct pipe_context *pipe, +extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a139..cecb1efc900 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -145,7 +145,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +188,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 679c28ce4b1..069f8159381 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -96,7 +96,11 @@ struct nv50_reg { #define NV50_MOD_NEG 1 #define NV50_MOD_ABS 2 +#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS) #define NV50_MOD_SAT 4 +#define NV50_MOD_I32 8 + +/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */ /* STACK: Conditionals and loops have to use the (per warp) stack. * Stack entries consist of an entry type (divergent path, join at), @@ -134,6 +138,7 @@ struct nv50_pc { uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ struct nv50_reg *temp_temp[16]; + struct nv50_program_exec *temp_temp_exec[16]; unsigned temp_temp_nr; /* broadcast and destination replacement regs */ @@ -154,26 +159,17 @@ struct nv50_pc { int if_lvl, loop_lvl; unsigned loop_pos[NV50_MAX_LOOP_NESTING]; + unsigned *insn_pos; /* actual program offset of each TGSI insn */ + boolean in_subroutine; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; boolean allow32; -}; - -static INLINE struct nv50_reg * -reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) -{ - struct nv50_reg *ri; - assert(pc->reg_instance_nr < 16); - ri = &pc->reg_instances[pc->reg_instance_nr++]; - if (reg) { - *ri = *reg; - reg->mod = 0; - } - return ri; -} + uint8_t edgeflag_out; +}; static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) @@ -250,7 +246,23 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); +} + +static INLINE struct nv50_reg * +reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *ri; + + assert(pc->reg_instance_nr < 16); + ri = &pc->reg_instances[pc->reg_instance_nr++]; + if (reg) { + alloc_reg(pc, reg); + *ri = *reg; + reg->mod = 0; + } + return ri; } /* XXX: For shaders that aren't executed linearly (e.g. shaders that @@ -275,26 +287,11 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); return NULL; } -/* Assign the hw of the discarded temporary register src - * to the tgsi register dst and free src. - */ -static void -assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - assert(src->index == -1 && src->hw != -1); - - if (dst->hw != -1) - pc->r_temp[dst->hw] = NULL; - pc->r_temp[src->hw] = dst; - dst->hw = src->hw; - - FREE(src); -} - /* release the hardware resource held by r */ static void release_hw(struct nv50_pc *pc, struct nv50_reg *r) @@ -353,23 +350,29 @@ free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) } static struct nv50_reg * -temp_temp(struct nv50_pc *pc) +temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + pc->temp_temp_exec[pc->temp_temp_nr] = e; return pc->temp_temp[pc->temp_temp_nr++]; } +/* This *must* be called for all nv50_program_exec that have been + * given as argument to temp_temp, or the temps will be leaked ! + */ static void -kill_temp_temp(struct nv50_pc *pc) +kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { int i; for (i = 0; i < pc->temp_temp_nr; i++) - free_temp(pc, pc->temp_temp[i]); - pc->temp_temp_nr = 0; + if (pc->temp_temp_exec[i] == e) + free_temp(pc, pc->temp_temp[i]); + if (!e) + pc->temp_temp_nr = 0; } static int @@ -431,6 +434,8 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e) p->exec_head = e; p->exec_tail = e; p->exec_size += (e->inst[0] & 1) ? 2 : 1; + + kill_temp_temp(pc, e); } static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); @@ -451,10 +456,19 @@ is_immd(struct nv50_program_exec *e) return FALSE; } +static boolean +is_join(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 2) + return TRUE; + return FALSE; +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) { + assert(!is_immd(e)); set_long(pc, e); e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); e->inst[1] |= (pred << 7) | (idx << 12); @@ -497,15 +511,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { - union { - float f; - uint32_t ui; - } u; - u.ui = pc->immd_buf[imm->hw]; - - u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f; - u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f; - set_long(pc, e); /* XXX: can't be predicated - bits overlap; cases where both * are required should be avoided by using pc->allow32 */ @@ -513,8 +518,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) set_pred_wr(pc, 0, 0, e); e->inst[1] |= 0x00000002 | 0x00000001; - e->inst[0] |= (u.ui & 0x3f) << 16; - e->inst[1] |= (u.ui >> 6) << 2; + e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16; + e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2; } static INLINE void @@ -663,6 +668,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } +/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -715,6 +721,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) FREE(imm); } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (pc->if_lvl || pc->loop_lvl || + (dst->type != P_TEMP) || + (src->hw < pc->result_nr * 4 && + pc->p->type == PIPE_SHADER_FRAGMENT) || + pc->p->info.opcode_count[TGSI_OPCODE_CAL] || + pc->p->info.opcode_count[TGSI_OPCODE_BRA]) { + + emit_mov(pc, dst, src); + free_temp(pc, src); + return; + } + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + static void emit_nop(struct nv50_pc *pc) { @@ -757,7 +791,7 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_reg *temp; if (src->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } @@ -776,7 +810,7 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) e->inst[1] |= 0x00200000; } else if (src->type == P_CONST || src->type == P_IMMD) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -792,7 +826,7 @@ static void set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -800,7 +834,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x00800000)); if (e->inst[0] & 0x01000000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -822,7 +856,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) set_long(pc, e); if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -830,7 +864,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x01000000)); if (e->inst[0] & 0x00800000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -845,6 +879,26 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } static void +set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh, + struct nv50_program_exec *e, int pos) +{ + struct nv50_reg *r = src; + + alloc_reg(pc, r); + if (r->type != P_TEMP) { + r = temp_temp(pc, e); + emit_mov(pc, r, src); + } + + if (r->hw > (NV50_SU_MAX_TEMP / 2)) { + NOUVEAU_ERR("out of low GPRs\n"); + abort(); + } + + e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32); +} + +static void emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred) { struct nv50_program_exec *e = exec(pc); @@ -886,7 +940,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->mod & NV50_MOD_NEG) + if (src0->mod ^ src1->mod) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { @@ -948,6 +1002,13 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +#define NV50_MAX_F32 0x880 +#define NV50_MAX_S32 0x08c +#define NV50_MAX_U32 0x084 +#define NV50_MIN_F32 0x8a0 +#define NV50_MIN_S32 0x0ac +#define NV50_MIN_U32 0x0a4 + static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -955,8 +1016,8 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_program_exec *e = exec(pc); set_long(pc, e); - e->inst[0] |= 0xb0000000; - e->inst[1] |= (sub << 29); + e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20); + e->inst[1] |= (sub << 24); check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, e); @@ -997,6 +1058,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, op != TGSI_OPCODE_XOR) assert(!"invalid bit op"); + assert(!(src0->mod | src1->mod)); + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { set_immd(pc, src1, e); if (op == TGSI_OPCODE_OR) @@ -1018,6 +1081,69 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, } static void +emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + e->inst[1] = 0x0402c000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_1(pc, src, e); + + emit(pc, e); +} + +static void +emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4000000; + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (src1->type == P_IMMD) { + e->inst[1] |= (1 << 20); + e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16; + } else + set_src_1(pc, src1, e); + + if (dir != TGSI_OPCODE_SHL) + e->inst[1] |= (1 << 29); + + if (dir == TGSI_OPCODE_ISHR) + e->inst[1] |= (1 << 27); + + emit(pc, e); +} + +static void +emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, int s) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4100000; + if (s < 0) { + e->inst[1] |= 1 << 29; + s = -s; + } + e->inst[1] |= ((s & 0x7f) << 16); + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { @@ -1048,6 +1174,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, src2->mod ^= NV50_MOD_NEG; } +#define NV50_FLOP_RCP 0 +#define NV50_FLOP_RSQ 2 +#define NV50_FLOP_LG2 3 +#define NV50_FLOP_SIN 4 +#define NV50_FLOP_COS 5 +#define NV50_FLOP_EX2 6 + +/* rcp, rsqrt, lg2 support neg and abs */ static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -1055,17 +1189,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x90000000; - if (sub) { + if (sub || src->mod) { set_long(pc, e); e->inst[1] |= (sub << 29); } set_dst(pc, dst, e); + set_src_0_restricted(pc, src, e); - if (sub == 0 || sub == 2) - set_src_0_restricted(pc, src, e); - else - set_src_0(pc, src, e); + assert(!src->mod || sub < 4); + + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; emit(pc, e); } @@ -1082,6 +1219,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29) | 0x00004000; + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1097,39 +1239,49 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29); + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } -#define CVTOP_RN 0x01 -#define CVTOP_FLOOR 0x03 -#define CVTOP_CEIL 0x05 -#define CVTOP_TRUNC 0x07 -#define CVTOP_SAT 0x08 -#define CVTOP_ABS 0x10 - -/* 0x04 == 32 bit dst */ -/* 0x40 == dst is float */ -/* 0x80 == src is float */ -#define CVT_F32_F32 0xc4 -#define CVT_F32_S32 0x44 -#define CVT_S32_F32 0x8c -#define CVT_S32_S32 0x0c -#define CVT_NEG 0x20 -#define CVT_RI 0x08 +#define CVT_RN (0x00 << 16) +#define CVT_FLOOR (0x02 << 16) +#define CVT_CEIL (0x04 << 16) +#define CVT_TRUNC (0x06 << 16) +#define CVT_SAT (0x08 << 16) +#define CVT_ABS (0x10 << 16) + +#define CVT_X32_X32 0x04004000 +#define CVT_X32_S32 0x04014000 +#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32) +#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32) +#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32) +#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32) +#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32) +#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32) +#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32) +#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32) + +#define CVT_NEG 0x20000000 +#define CVT_RI 0x08000000 static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cvn, unsigned fmt) + int wp, uint32_t cvn) { struct nv50_program_exec *e; e = exec(pc); - set_long(pc, e); - e->inst[0] |= 0xa0000000; - e->inst[1] |= 0x00004000; /* 32 bit src */ - e->inst[1] |= (cvn << 16); - e->inst[1] |= (fmt << 24); + if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG; + if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS; + + e->inst[0] = 0xa0000000; + e->inst[1] = cvn; + set_long(pc, e); set_src_0(pc, src, e); if (wp >= 0) @@ -1154,10 +1306,12 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, * 0x6 = GE * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) * 0x8 = unordered bit (allows NaN) + * + * mode = 0x04 (u32), 0x0c (s32), 0x80 (f32) */ static void emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, - struct nv50_reg *src0, struct nv50_reg *src1) + struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode) { static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; @@ -1172,16 +1326,10 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, if (dst && dst->type != P_TEMP) dst = alloc_temp(pc, NULL); - /* set.u32 */ set_long(pc, e); - e->inst[0] |= 0xb0000000; + e->inst[0] |= 0x30000000 | (mode << 24); e->inst[1] |= 0x60000000 | (ccode << 14); - /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but - * that doesn't seem to match what the hw actually does - e->inst[1] |= 0x04000000; << breaks things, u32 by default ? - */ - if (wp >= 0) set_pred_wr(pc, 1, wp, e); if (dst) @@ -1196,33 +1344,146 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, emit(pc, e); - /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ - if (rdst) - emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); + if (rdst && mode == 0x80) /* convert to float ? */ + emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32); if (rdst && rdst != dst) free_temp(pc, dst); } -static INLINE unsigned -map_tgsi_setop_cc(unsigned op) +static INLINE void +map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty) { switch (op) { - case TGSI_OPCODE_SLT: return 0x1; - case TGSI_OPCODE_SGE: return 0x6; - case TGSI_OPCODE_SEQ: return 0x2; - case TGSI_OPCODE_SGT: return 0x4; - case TGSI_OPCODE_SLE: return 0x3; - case TGSI_OPCODE_SNE: return 0xd; + case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break; + case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break; + case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break; + case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break; + case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break; + case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break; + + case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break; + case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break; + case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break; + case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break; + case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break; + case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break; default: assert(0); - return 0; + return; } } +static void +emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *rsrc1) +{ + struct nv50_program_exec *e = exec(pc); + struct nv50_reg *src1; + + e->inst[0] = 0x20000000; + + alloc_reg(pc, rsrc1); + check_swap_src_0_1(pc, &src0, &rsrc1); + + src1 = rsrc1; + if (src0->mod & rsrc1->mod & NV50_MOD_NEG) { + src1 = temp_temp(pc, e); + emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32); + } + + if (!pc->allow32 || src1->hw > 63 || + (src1->type != P_TEMP && src1->type != P_IMMD)) + set_long(pc, e); + + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (is_long(e)) { + e->inst[1] |= 1 << 26; + set_src_2(pc, src1, e); + } else { + e->inst[0] |= 0x8000; + if (src1->type == P_IMMD) + set_immd(pc, src1, e); + else + set_src_1(pc, src1, e); + } + + if (src0->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 28; + else + if (src1->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 22; + + emit(pc, e); +} + +static void +emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1, + struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x60000000; + if (!pc->allow32) + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + + emit(pc, e); +} + +static void +emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x50000000; + if (!pc->allow32) + set_long(pc, e); + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + if (is_long(e)) + e->inst[1] |= 0x0c << 24; + else + e->inst[0] |= 0x81 << 8; + + emit(pc, e); +} + static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI); + emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI); } static void @@ -1231,24 +1492,18 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_reg *temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, v); + emit_flop(pc, NV50_FLOP_LG2, temp, v); emit_mul(pc, temp, temp, e); emit_preex2(pc, temp, temp); - emit_flop(pc, 6, dst, temp); + emit_flop(pc, NV50_FLOP_EX2, dst, temp); free_temp(pc, temp); } static INLINE void -emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); -} - -static INLINE void emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); + emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32); } static void @@ -1266,18 +1521,18 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (mask & (3 << 1)) { tmp[0] = alloc_temp(pc, NULL); - emit_minmax(pc, 4, tmp[0], src[0], zero); + emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero); } if (mask & (1 << 2)) { set_pred_wr(pc, 1, 0, pc->p->exec_tail); - tmp[1] = temp_temp(pc); - emit_minmax(pc, 4, tmp[1], src[1], zero); + tmp[1] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); - emit_minmax(pc, 4, tmp[3], src[3], neg128); - emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + tmp[3] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128); + emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128); emit_pow(pc, dst[2], tmp[1], tmp[3]); emit_mov(pc, dst[2], zero); @@ -1305,12 +1560,6 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, FREE(one); } -static INLINE void -emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG); -} - static void emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { @@ -1322,80 +1571,62 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) set_long(pc, e); /* sets cond code to ALWAYS */ if (src) { - unsigned cvn = CVT_F32_F32; - set_pred(pc, 0x1 /* cc = LT */, r_pred, e); - - if (src->mod & NV50_MOD_NEG) - cvn |= CVT_NEG; - /* write predicate reg */ - emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); + /* write to predicate reg */ + emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32); } emit(pc, e); } static struct nv50_program_exec * -emit_breakaddr(struct nv50_pc *pc) +emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc) { struct nv50_program_exec *e = exec(pc); - e->inst[0] = 0x40000002; + e->inst[0] = (op << 28) | 2; set_long(pc, e); + if (pred >= 0) + set_pred(pc, cc, pred, e); emit(pc, e); return e; } -static void -emit_break(struct nv50_pc *pc, int pred, unsigned cc) +static INLINE struct nv50_program_exec * +emit_breakaddr(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x50000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); + return emit_control_flow(pc, 0x4, -1, 0); +} - emit(pc, e); +static INLINE void +emit_break(struct nv50_pc *pc, int pred, unsigned cc) +{ + emit_control_flow(pc, 0x5, pred, cc); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_joinat(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000002; - set_long(pc, e); - - emit(pc, e); - return e; + return emit_control_flow(pc, 0xa, -1, 0); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_branch(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); + return emit_control_flow(pc, 0x1, pred, cc); +} - e->inst[0] = 0x10000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - emit(pc, e); - return pc->p->exec_tail; +static INLINE struct nv50_program_exec * +emit_call(struct nv50_pc *pc, int pred, unsigned cc) +{ + return emit_control_flow(pc, 0x2, pred, cc); } -static void +static INLINE void emit_ret(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x30000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - - emit(pc, e); + emit_control_flow(pc, 0x3, pred, cc); } #define QOP_ADD 0 @@ -1445,8 +1676,8 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], src[1]->mod |= NV50_MOD_ABS; src[2]->mod |= NV50_MOD_ABS; - emit_minmax(pc, 4, t[2], src[0], src[1]); - emit_minmax(pc, 4, t[2], src[2], t[2]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]); src[0]->mod = mod[0]; src[1]->mod = mod[1]; @@ -1458,7 +1689,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], if (arg == 4) /* there is no textureProj(samplerCubeShadow) */ emit_mov(pc, t[3], src[3]); - emit_flop(pc, 0, t[2], t[2]); + emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]); emit_mul(pc, t[0], src[0], t[2]); emit_mul(pc, t[1], src[1], t[2]); @@ -1476,7 +1707,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], t[3]->rhw = src[3]->rhw; emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); - emit_flop(pc, 0, t[3], t[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]); for (c = 0; c < dim; ++c) { t[c]->rhw = src[c]->rhw; @@ -1490,7 +1721,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], /* XXX: for some reason the blob sometimes uses MAD * (mad f32 $rX $rY $rZ neg $r63) */ - emit_flop(pc, 0, t[3], src[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]); for (c = 0; c < dim; ++c) emit_mul(pc, t[c], src[c], t[3]); if (arg != dim) /* depth reference value */ @@ -1537,7 +1768,13 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod, struct nv50_reg *src, struct nv50_program_exec *tex) { struct nv50_program_exec *join_at; - unsigned i, target = pc->p->exec_size + 7 * 2; + unsigned i, target = pc->p->exec_size + 9 * 2; + + if (pc->p->type != PIPE_SHADER_FRAGMENT) { + emit(pc, tex); + return; + } + pc->allow32 = FALSE; /* Subtract lod of each pixel from lod of top left pixel, jump * texlod insn if result is 0, then repeat for 2 other pixels. @@ -1663,6 +1900,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit(pc, e); } else if (bias_lod < 0) { + assert(pc->p->type == PIPE_SHADER_FRAGMENT); e->inst[0] |= arg << 22; e->inst[1] |= 0x20000000; /* texbias */ emit_mov(pc, t[arg], src[3]); @@ -1742,6 +1980,21 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) q = 0x0403c000; m = 0xffff7fff; break; + case 0x2: + case 0x3: + /* ADD, SUB, SUBR b32 */ + m = ~(0x8000 | (127 << 16)); + q = ((e->inst[0] & (~m)) >> 2) | (1 << 26); + break; + case 0x5: + /* SAD */ + m = ~(0x81 << 8); + q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12); + break; + case 0x6: + /* MAD u16 */ + q = (e->inst[0] & (0x7f << 2)) << 12; + break; case 0x8: /* INTERP (move centroid, perspective and flat bits) */ m = ~0x03000100; @@ -1778,26 +2031,57 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) } /* Some operations support an optional negation flag. */ -static boolean -negate_supported(const struct tgsi_full_instruction *insn, int i) +static int +get_supported_mods(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_COS: case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: - case TGSI_OPCODE_MUL: + case TGSI_OPCODE_EX2: case TGSI_OPCODE_KIL: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_MAD: - return TRUE; + case TGSI_OPCODE_MUL: case TGSI_OPCODE_POW: - if (i == 1) - return TRUE; - return FALSE; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */ + case TGSI_OPCODE_SCS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SUB: + return NV50_MOD_NEG; + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */ + return NV50_MOD_ABS; + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: + return NV50_MOD_NEG | NV50_MOD_ABS; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32; + case TGSI_OPCODE_UADD: + return NV50_MOD_NEG | NV50_MOD_I32; + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_USHR: + return NV50_MOD_I32; default: - return FALSE; + return 0; } } @@ -1820,7 +2104,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_DST: return mask & (c ? 0xa : 0x6); case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: @@ -1902,11 +2188,11 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) static struct nv50_reg * tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, - boolean neg) + int mod) { struct nv50_reg *r = NULL; - struct nv50_reg *temp; - unsigned sgn, c, swz; + struct nv50_reg *temp = NULL; + unsigned sgn, c, swz, cvn; if (src->Register.File != TGSI_FILE_CONSTANT) assert(!src->Register.Indirect); @@ -1946,7 +2232,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = &pc->immd[src->Register.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: - break; + return NULL; case TGSI_FILE_ADDRESS: r = pc->addr[src->Register.Index * 4 + c]; assert(r); @@ -1961,35 +2247,34 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; } + cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32; + switch (sgn) { - case TGSI_UTIL_SIGN_KEEP: - break; case TGSI_UTIL_SIGN_CLEAR: - temp = temp_temp(pc); - emit_abs(pc, temp, r); - r = temp; - break; - case TGSI_UTIL_SIGN_TOGGLE: - if (neg) - r->mod = NV50_MOD_NEG; - else { - temp = temp_temp(pc); - emit_neg(pc, temp, r); - r = temp; - } + r->mod = NV50_MOD_ABS; break; case TGSI_UTIL_SIGN_SET: - temp = temp_temp(pc); - emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); - r = temp; + r->mod = NV50_MOD_NEG_ABS; + break; + case TGSI_UTIL_SIGN_TOGGLE: + r->mod = NV50_MOD_NEG; break; default: - assert(0); + assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP); break; } - if (r && r->acc >= 0 && r != temp) - return reg_instance(pc, r); + if ((r->mod & mod) != r->mod) { + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, r, -1, cvn); + r->mod = 0; + r = temp; + } else + r->mod |= mod & NV50_MOD_I32; + + assert(r); + if (r->acc >= 0 && r != temp) + return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2042,6 +2327,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c) assert(0); return 0x0; } + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_LIT: case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: @@ -2082,6 +2369,8 @@ nv50_kill_branch(struct nv50_pc *pc) if (pc->if_insn[lvl]->next != pc->p->exec_tail) return FALSE; + if (is_immd(pc->p->exec_tail)) + return FALSE; /* if ccode == 'true', the BRA is from an ELSE and the predicate * reg may no longer be valid, since we currently always use $p0 @@ -2149,22 +2438,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs = &inst->Src[i]; unsigned src_mask; - boolean neg_supp; + int mod_supp; src_mask = nv50_tgsi_src_mask(inst, i); - neg_supp = negate_supported(inst, i); + mod_supp = get_supported_mods(inst, i); if (fs->Register.File == TGSI_FILE_SAMPLER) unit = fs->Register.Index; for (c = 0; c < 4; c++) if (src_mask & (1 << c)) - src[i][c] = tgsi_src(pc, c, fs, neg_supp); + src[i][c] = tgsi_src(pc, c, fs, mod_supp); } brdc = temp = pc->r_brdc; if (brdc && brdc->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (sat) brdc = temp; } else @@ -2173,7 +2462,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; /* rdst[c] = dst[c]; */ /* done above */ - dst[c] = temp_temp(pc); + dst[c] = temp_temp(pc, NULL); } } @@ -2184,7 +2473,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_abs(pc, dst[c], src[0][c]); + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_ABS | CVT_F32_F32); } break; case TGSI_OPCODE_ADD: @@ -2206,8 +2496,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_ARL: assert(src[0][0]); - temp = temp_temp(pc); - emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32); + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); emit_arl(pc, dst[0], temp, 4); break; case TGSI_OPCODE_BGNLOOP: @@ -2215,16 +2505,28 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_BGNSUB: + assert(!pc->in_subroutine); + pc->in_subroutine = TRUE; + /* probably not necessary, but align to 8 byte boundary */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + break; case TGSI_OPCODE_BRK: assert(pc->loop_lvl > 0); emit_break(pc, -1, 0); break; + case TGSI_OPCODE_CAL: + assert(inst->Label.Label < pc->insn_nr); + emit_call(pc, -1, 0)->param.index = inst->Label.Label; + /* replaced by actual offset in nv50_program_fixup_insns */ + break; case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_CEIL, CVT_F32_F32 | CVT_RI); + CVT_CEIL | CVT_F32_F32 | CVT_RI); } break; case TGSI_OPCODE_CMP: @@ -2232,24 +2534,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32); + emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32); emit_mov(pc, dst[c], src[1][c]); set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */ emit_mov(pc, dst[c], src[2][c]); set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ } break; + case TGSI_OPCODE_CONT: + assert(pc->loop_lvl > 0); + emit_branch(pc, -1, 0)->param.index = + pc->loop_pos[pc->loop_lvl - 1]; + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 5, dst[3], temp); + emit_flop(pc, NV50_FLOP_COS, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, brdc, temp); + emit_flop(pc, NV50_FLOP_COS, brdc, temp); break; case TGSI_OPCODE_DDX: for (c = 0; c < 4; c++) { @@ -2321,9 +2628,56 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDSUB: + assert(pc->in_subroutine); + pc->in_subroutine = FALSE; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, brdc, temp); + emit_flop(pc, NV50_FLOP_EX2, brdc, temp); + break; + case TGSI_OPCODE_EXP: + { + struct nv50_reg *t[2]; + + assert(!temp); + t[0] = temp_temp(pc, NULL); + t[1] = temp_temp(pc, NULL); + + if (mask & 0x6) + emit_mov(pc, t[0], src[0][0]); + if (mask & 0x3) + emit_flr(pc, t[1], src[0][0]); + + if (mask & (1 << 1)) + emit_sub(pc, dst[1], t[0], t[1]); + if (mask & (1 << 0)) { + emit_preex2(pc, t[1], t[1]); + emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]); + } + if (mask & (1 << 2)) { + emit_preex2(pc, t[0], t[0]); + emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } + break; + case TGSI_OPCODE_F2I: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_S32_F32); + } + break; + case TGSI_OPCODE_F2U: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_U32_F32); + } break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -2333,7 +2687,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_FRC: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2341,14 +2695,42 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_sub(pc, dst[c], src[0][c], temp); } break; + case TGSI_OPCODE_I2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32); + } + break; case TGSI_OPCODE_IF: assert(pc->if_lvl < NV50_MAX_COND_NESTING); - emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN, - CVT_F32_F32); + emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32); pc->if_join[pc->if_lvl] = emit_joinat(pc); pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);; terminate_mbb(pc); break; + case TGSI_OPCODE_IMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_IMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_INEG: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_S32_S32 | CVT_NEG); + } + break; case TGSI_OPCODE_KIL: assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]); emit_kil(pc, src[0][0]); @@ -2363,10 +2745,38 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - emit_flop(pc, 3, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]); + break; + case TGSI_OPCODE_LOG: + { + struct nv50_reg *t[2]; + + t[0] = temp_temp(pc, NULL); + if (mask & (1 << 1)) + t[1] = temp_temp(pc, NULL); + else + t[1] = t[0]; + + emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32); + emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], t[1]); + emit_flr(pc, t[1], t[1]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], t[1]); + if (mask & (1 << 1)) { + t[1]->mod = NV50_MOD_NEG; + emit_preex2(pc, t[1], t[1]); + t[1]->mod = 0; + emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]); + emit_mul(pc, dst[1], t[0], t[1]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_LRP: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2385,14 +2795,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MIN: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MOV: @@ -2409,44 +2819,73 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_NOT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_not(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_POW: emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - emit_flop(pc, 0, brdc, src[0][0]); + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; + emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: - if (pc->p->type == PIPE_SHADER_FRAGMENT) + if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine) nv50_fp_move_results(pc); emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: - emit_flop(pc, 2, brdc, src[0][0]); + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; + src[0][0]->mod |= NV50_MOD_ABS; + emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); + break; + case TGSI_OPCODE_SAD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } break; case TGSI_OPCODE_SCS: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) - emit_flop(pc, 5, dst[0], temp); + emit_flop(pc, NV50_FLOP_COS, dst[0], temp); if (mask & (1 << 1)) - emit_flop(pc, 4, dst[1], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[1], temp); if (mask & (1 << 2)) emit_mov_immdval(pc, dst[2], 0.0); if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_USHR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_shift(pc, dst[c], src[0][c], src[1][c], + inst->Instruction.Opcode); + } + break; case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 4, dst[3], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, brdc, temp); + emit_flop(pc, NV50_FLOP_SIN, brdc, temp); break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: @@ -2454,12 +2893,23 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SGT: case TGSI_OPCODE_SLE: case TGSI_OPCODE_SNE: - i = map_tgsi_setop_cc(inst->Instruction.Opcode); + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + { + uint8_t cc, ty; + + map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty); + for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); + emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty); } + } break; case TGSI_OPCODE_SUB: for (c = 0; c < 4; c++) { @@ -2489,11 +2939,72 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_TRUNC, CVT_F32_F32 | CVT_RI); + CVT_TRUNC | CVT_F32_F32 | CVT_RI); + } + break; + case TGSI_OPCODE_U2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32); + } + break; + case TGSI_OPCODE_UADD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_add_b32(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_UMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMAD: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0, + temp); + emit_add_b32(pc, dst[c], temp, src[2][c]); + } + } + break; + case TGSI_OPCODE_UMUL: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0, + temp); + } + } + break; case TGSI_OPCODE_XPD: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & (1 << 0)) { emit_mul(pc, temp, src[0][2], src[1][1]); emit_msb(pc, dst[0], src[0][1], src[1][2], temp); @@ -2510,6 +3021,17 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_END: + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); + + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + else + if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + emit_nop(pc); + + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -2536,7 +3058,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } } - kill_temp_temp(pc); + kill_temp_temp(pc, NULL); pc->reg_instance_nr = 0; return TRUE; @@ -2545,7 +3067,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, static void prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) { - struct nv50_reg *reg = NULL; + struct nv50_reg *r, *reg = NULL; const struct tgsi_full_src_register *src; const struct tgsi_dst_register *dst; unsigned i, c, k, mask; @@ -2554,10 +3076,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) - reg = pc->temp; + reg = pc->temp; else - if (dst->File == TGSI_FILE_OUTPUT) - reg = pc->result; + if (dst->File == TGSI_FILE_OUTPUT) { + reg = pc->result; + + if (insn->Instruction.Opcode == TGSI_OPCODE_MOV && + dst->Index == pc->edgeflag_out && + insn->Src[0].Register.File == TGSI_FILE_INPUT) + pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index; + } if (reg) { for (c = 0; c < 4; c++) { @@ -2585,7 +3113,15 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) continue; k = tgsi_util_get_full_src_register_swizzle(src, c); - reg[src->Register.Index * 4 + k].acc = pc->insn_nr; + r = ®[src->Register.Index * 4 + k]; + + /* If used before written, pre-allocate the reg, + * lest we overwrite results from a subroutine. + */ + if (!r->acc && r->type == P_TEMP) + alloc_reg(pc, r); + + r->acc = pc->insn_nr; } } } @@ -2674,7 +3210,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { unsigned chn, mask = nv50_tgsi_src_mask(insn, i); - boolean neg_supp = negate_supported(insn, i); + int ms = get_supported_mods(insn, i); fs = &insn->Src[i]; if (fs->Register.File != fd->Register.File || @@ -2692,10 +3228,12 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, if (!(fd->Register.WriteMask & (1 << c))) continue; - /* no danger if src is copied to TEMP first */ - if ((s != TGSI_UTIL_SIGN_KEEP) && - (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) - continue; + if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG)) + continue; + if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS)) + continue; + if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3)) + continue; rdep[c] |= nv50_tgsi_dst_revdep( insn->Instruction.Opcode, i, chn); @@ -2719,12 +3257,12 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (is_scalar_op(insn.Instruction.Opcode)) { pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); if (!pc->r_brdc) - pc->r_brdc = temp_temp(pc); + pc->r_brdc = temp_temp(pc, NULL); return nv50_program_tx_insn(pc, &insn); } pc->r_brdc = NULL; - if (!deqs) + if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3])) return nv50_program_tx_insn(pc, &insn); deqs = nv50_revdep_reorder(m, rdep); @@ -2775,7 +3313,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); - emit_flop(pc, 0, iv, iv); + emit_flop(pc, NV50_FLOP_RCP, iv, iv); /* XXX: when loading interpolants dynamically, move these * to the program head, or make sure it can't be skipped. @@ -2856,6 +3394,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (p->cfg.io_nr > first) p->cfg.io_nr = first; break; + case TGSI_SEMANTIC_EDGEFLAG: + pc->edgeflag_out = first; + break; /* case TGSI_SEMANTIC_CLIP_DISTANCE: p->cfg.clpd = MIN2(p->cfg.clpd, first); @@ -3081,6 +3622,8 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } @@ -3104,6 +3647,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; @@ -3192,16 +3737,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc) if (e->param.index >= 0 && !e->param.mask) bra_list[n++] = e; - /* last instruction must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - /* set exit bit */ - pc->p->exec_tail->inst[1] |= 1; - - /* !immd on exit insn simultaneously means !join */ - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); - /* Make sure we don't have any single 32 bit instructions. */ for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; @@ -3210,12 +3745,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc) for (i = 0; i < n; ++i) if (bra_list[i]->param.index >= pos) bra_list[i]->param.index += 1; + for (i = 0; i < pc->insn_nr; ++i) + if (pc->insn_pos[i] >= pos) + pc->insn_pos[i] += 1; convert_to_long(pc, e); ++pos; } } FREE(bra_list); + + if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL]) + return; + + /* fill in CALL offsets */ + for (e = pc->p->exec_head; e; e = e->next) { + if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2) + e->param.index = pc->insn_pos[e->param.index]; + } } static boolean @@ -3237,19 +3784,20 @@ nv50_program_tx(struct nv50_program *p) if (ret == FALSE) goto out_cleanup; + pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned)); + tgsi_parse_init(&parse, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { const union tgsi_full_token *tok = &parse.FullToken; - /* don't allow half insn/immd on first and last instruction */ + /* previously allow32 was FALSE for first & last instruction */ pc->allow32 = TRUE; - if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) - pc->allow32 = FALSE; tgsi_parse_token(&parse); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_pos[pc->insn_cur] = pc->p->exec_size; ++pc->insn_cur; ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) @@ -3260,9 +3808,6 @@ nv50_program_tx(struct nv50_program *p) } } - if (pc->p->type == PIPE_SHADER_FRAGMENT) - nv50_fp_move_results(pc); - nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; @@ -3434,7 +3979,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(13, 2); + so = so_new(5, 8, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3470,7 +4015,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); + so = so_new(6, 7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3480,7 +4025,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); + so_method(so, tesla, NV50TCL_FP_CONTROL, 1); so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); so_data (so, p->cfg.regs[3]); @@ -3490,12 +4035,13 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } -static void +static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { struct nv50_program *fp = nv50->fragprog; struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; + uint32_t origin = 0x00000010; /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in @@ -3529,7 +4075,9 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) if (mode == PIPE_SPRITE_COORD_NONE) { m += n; continue; - } + } else + if (mode == PIPE_SPRITE_COORD_LOWER_LEFT) + origin = 0; } /* this is either PointCoord or replaced by sprite coords */ @@ -3540,6 +4088,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) ++m; } } + return origin; } static int @@ -3638,7 +4187,7 @@ nv50_linkage_validate(struct nv50_context *nv50) } /* now fill the stateobj */ - so = so_new(64, 0); + so = so_new(7, 57, 0); n = (m + 3) / 4; so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); @@ -3652,11 +4201,13 @@ nv50_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); so_data (so, reg[4]); - so_method(so, tesla, 0x1540, 4); + so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4); so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { - nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1); + so_data (so, + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff)); so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); so_datap (so, pcrd, 8); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 4a90c372ce3..461fec1d89c 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -58,6 +58,7 @@ struct nv50_program { /* VP only */ uint8_t clpd, clpd_nr; uint8_t psiz; + uint8_t edgeflag_in; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 268c9823f7d..5a4ab3508b8 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(chan, tesla, 0x1530, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, 0x1514, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); OUT_RING (chan, 1); q->ready = FALSE; @@ -111,7 +111,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | - wait ? 0 : NOUVEAU_BO_NOWAIT); + (wait ? 0 : NOUVEAU_BO_NOWAIT)); if (ret) return false; q->result = ((uint32_t *)q->bo->map)[1]; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index d443ca3ad06..28e2b35deaa 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; + return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case NOUVEAU_CAP_HW_VTXBUF: @@ -165,6 +165,21 @@ static void nv50_screen_destroy(struct pipe_screen *pscreen) { struct nv50_screen *screen = nv50_screen(pscreen); + unsigned i; + + for (i = 0; i < 2; i++) { + if (screen->constbuf_parm[i]) + nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); + } + + if (screen->constbuf_misc[0]) + nouveau_bo_ref(NULL, &screen->constbuf_misc[0]); + if (screen->tic) + nouveau_bo_ref(NULL, &screen->tic); + if (screen->tsc) + nouveau_bo_ref(NULL, &screen->tsc); + if (screen->static_init) + so_ref(NULL, &screen->static_init); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->tesla); @@ -174,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen) FREE(screen); } +static int +nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, + unsigned usage) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *ctx = screen->cur_ctx; + + if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX)) + return 0; + + /* Our vtxbuf got mapped, it can no longer be considered part of current + * state, remove it to avoid emitting reloc markers. + */ + if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf, + nouveau_bo(pb))) { + so_ref(NULL, &ctx->state.vtxbuf); + ctx->dirty |= NV50_NEW_ARRAYS; + } + + return 0; +} + struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -201,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; + screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); nv50_transfer_init_screen_functions(pscreen); @@ -213,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->m2mf, 1); /* 2D object */ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); @@ -222,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->eng2d, 2); /* 3D object */ switch (chipset & 0xf0) { @@ -231,8 +267,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) break; case 0x80: case 0x90: - /* this stupid name should be corrected. */ - tesla_class = NV54TCL; + tesla_class = NV84TCL; break; case 0xa0: switch (chipset) { @@ -242,7 +277,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NVA0TCL; break; default: - tesla_class = 0x8597; + tesla_class = NVA8TCL; break; } break; @@ -259,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->tesla, 3); /* Sync notifier */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); @@ -270,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static M2MF init */ - so = so_new(32, 0); + so = so_new(1, 3, 0); so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -279,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref (NULL, &so); /* Static 2D init */ - so = so_new(64, 0); + so = so_new(4, 7, 0); so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -287,7 +321,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, chan->vram->handle); so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); so_data (so, NV50_2D_OPERATION_SRCCOPY); - so_method(so, screen->eng2d, 0x0290, 1); + so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); so_data (so, 0); so_method(so, screen->eng2d, 0x0888, 1); so_data (so, 1); @@ -295,36 +329,35 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(256, 20); + so = so_new(40, 84, 20); - so_method(so, screen->tesla, 0x1558, 1); - so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); + so_data (so, NV50TCL_COND_MODE_ALWAYS); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), - NV50TCL_DMA_UNK0__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11); + for (i = 0; i < 11; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), - NV50TCL_DMA_UNK1__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0), + NV50TCL_DMA_COLOR__SIZE); + for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, 0x121c, 1); + so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1); so_data (so, 1); /* activate all 32 lanes (threads) in a warp */ - so_method(so, screen->tesla, 0x19a0, 1); + so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1); so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, 0x13b4, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); so_data (so, 0x54); - so_method(so, screen->tesla, 0x13bc, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ - so_method(so, screen->tesla, 0x13ac, 1); + so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); @@ -360,7 +393,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) // B = buffer ID (maybe more than 1 byte) // N = CB index used in shader instruction // P = program type (0 = VP, 2 = GP, 3 = FP) - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x000BBNP1); */ @@ -424,23 +457,26 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { - so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2); + so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } - so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ - so_method(so, screen->tesla, 0x1234, 1); + so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); /* activate first scissor rectangle */ - so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1); + so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, 1); /* default edgeflag to TRUE */ + so_emit(chan, so); so_ref (so, &screen->static_init); so_ref (NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 61e24a5b571..a038a4e3c2a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -2,6 +2,7 @@ #define __NV50_SCREEN_H__ #include "nouveau/nouveau_screen.h" +#include "nv50_context.h" struct nv50_screen { struct nouveau_screen base; @@ -9,6 +10,7 @@ struct nv50_screen { struct nouveau_winsys *nvws; unsigned cur_pctx; + struct nv50_context *cur_ctx; struct nouveau_grobj *tesla; struct nouveau_grobj *eng2d; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index d609b4cbc6f..1bbbbdd5f08 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -35,7 +35,7 @@ static void * nv50_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(5, 24, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); unsigned cmask = 0, i; @@ -146,7 +146,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, (wrap_mode(cso->wrap_r) << 6)); switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; break; @@ -157,7 +156,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, } switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MINF_LINEAR; break; @@ -280,7 +278,7 @@ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(15, 21, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_rasterizer_stateobj *rso = CALLOC_STRUCT(nv50_rasterizer_stateobj); @@ -295,7 +293,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : NV50TCL_SHADE_MODEL_SMOOTH); - so_method(so, tesla, 0x1684, 1); + so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1); so_data (so, cso->flatshade_first ? 0 : 1); so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); @@ -392,7 +390,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); so_data (so, fui(cso->offset_scale)); so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); - so_data (so, fui(cso->offset_units)); + so_data (so, fui(cso->offset_units * 2.0f)); } rso->pipe = *cso; @@ -425,7 +423,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(8, 22, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); so_data (so, cso->depth.writemask ? 1 : 0); @@ -439,9 +437,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - /* XXX: keep hex values until header is updated (names reversed) */ if (cso->stencil[0].enabled) { - so_method(so, tesla, 0x1380, 8); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); @@ -451,23 +448,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, cso->stencil[0].writemask); so_data (so, cso->stencil[0].valuemask); } else { - so_method(so, tesla, 0x1380, 1); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, 0x1594, 5); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, tesla, 0x0f54, 3); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[1].ref_value); so_data (so, cso->stencil[1].writemask); so_data (so, cso->stencil[1].valuemask); } else { - so_method(so, tesla, 0x1594, 1); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 871e8097b65..f83232f43cf 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -33,7 +33,7 @@ static void nv50_state_validate_fb(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(128, 18); + struct nouveau_stateobj *so = so_new(32, 79, 18); struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; @@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. * Ambiguous assignment results in no rendering (no DATA_ERROR). */ - so_method(so, tesla, 0x121c, 1); + so_method(so, tesla, NV50TCL_RT_CONTROL, 1); so_data (so, fb->nr_cbufs | (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); @@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1224, 1); + so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); so_data (so, 1); } @@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); } else { - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 0); } - so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); so_data (so, w << 16); so_data (so, h << 16); /* set window lower left corner */ - so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2); + so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2); so_data (so, 0); so_data (so, 0); /* set screen scissor rectangle */ @@ -185,6 +185,9 @@ nv50_state_emit(struct nv50_context *nv50) struct nv50_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; + /* I don't want to copy headers from the winsys. */ + screen->cur_ctx = nv50; + if (nv50->pctx_id != screen->cur_pctx) { if (nv50->state.fb) nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; @@ -296,7 +299,7 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(nv50->rasterizer->so, &nv50->state.rast); if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { - so = so_new(5, 0); + so = so_new(1, 4, 0); so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); so_data (so, fui(nv50->blend_colour.color[0])); so_data (so, fui(nv50->blend_colour.color[1])); @@ -307,7 +310,7 @@ nv50_state_validate(struct nv50_context *nv50) } if (nv50->dirty & NV50_NEW_STIPPLE) { - so = so_new(33, 0); + so = so_new(1, 32, 0); so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, util_bswap32(nv50->stipple.stipple[i])); @@ -324,8 +327,8 @@ nv50_state_validate(struct nv50_context *nv50) goto scissor_uptodate; nv50->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); + so = so_new(1, 2, 0); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); so_data(so, (s->maxy << 16) | s->miny); @@ -353,13 +356,13 @@ scissor_uptodate: goto viewport_uptodate; nv50->state.viewport_bypass = bypass; - so = so_new(14, 0); + so = so_new(5, 9, 0); if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); @@ -397,7 +400,8 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4); + so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES + + nr * 8, PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); @@ -440,7 +444,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50, so_data (so, 1); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); so_data (so, 0); so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 79655fc08d5..6378132979e 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, if (ret) return; - BEGIN_RING(chan, eng2d, 0x0580, 3); - OUT_RING (chan, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); + OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); OUT_RING (chan, format); OUT_RING (chan, value); - BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); OUT_RING (chan, destx); OUT_RING (chan, desty); OUT_RING (chan, width); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index c4ca096d6ac..bef548b7286 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -199,16 +199,18 @@ nv50_tex_validate(struct nv50_context *nv50) { struct nouveau_stateobj *so; struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned p, push, nrlc; + unsigned p, start, push, nrlc; - for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); nrlc += nv50->miptree_nr[p]; } - push = push * 11 + 23 * PIPE_SHADER_TYPES + 4; + start = start * 2 + 4 * PIPE_SHADER_TYPES + 2; + push = push * 9 + 19 * PIPE_SHADER_TYPES + 2; nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES; - so = so_new(push, nrlc); + so = so_new(start, push, nrlc); if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE || nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) { diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 4d9afa6fedc..a2f1db2914c 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); OUT_RING (chan, src_pitch); src_offset += (sy * src_pitch) + (sx * cpp); } else { @@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); OUT_RING (chan, dst_pitch); dst_offset += (dy * dst_pitch) + (dx * cpp); } else { @@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RELOCh(chan, src_bo, src_offset, src_reloc); OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); if (src_bo->tile_flags) { @@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, dst_offset += (line_count * dst_pitch); } BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); + NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); OUT_RING (chan, width * cpp); OUT_RING (chan, line_count); OUT_RING (chan, 0x00000101); @@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50, /* NV50_2D_OPERATION_SRCCOPY assumed already set */ - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); OUT_RING (chan, 0); OUT_RING (chan, src_format); BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); @@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50, src += src_pitch; } - BEGIN_RING(chan, tesla, 0x1440, 1); + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f7fa0659e8c..f2e510fba61 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c) { static const uint32_t hw_values[] = { 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16, 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 }; + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 }; /* we'd also have R11G11B10 and R10G10B10A2 */ @@ -152,7 +152,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } -boolean +void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { @@ -182,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - return ret; + /* XXX: not sure what to do if ret != TRUE: flush and retry? + */ + assert(ret); } static INLINE boolean @@ -198,7 +200,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, return nv50_push_elements_u08(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -208,7 +210,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -231,7 +233,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, return nv50_push_elements_u16(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -241,7 +243,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -266,7 +268,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, 0x400015e8, nr); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -275,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } -boolean +void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -317,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe, OUT_RING (chan, 0); pipe_buffer_unmap(pscreen, indexBuffer); - - return ret; + + /* XXX: what to do if ret != TRUE? Flush and retry? + */ + assert(ret); } static INLINE boolean @@ -350,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so = *pso; if (!so) - *pso = so = so_new(nv50->vtxelt_nr * 5, 0); + *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); switch (ve->nr_components) { case 4: @@ -372,6 +376,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so_data (so, fui(v[1])); break; case 1: + if (attrib == nv50->vertprog->cfg.edgeflag_in) { + so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, v[0] ? 1 : 0); + } so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; @@ -401,11 +409,14 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; + if (nv50->vertprog->cfg.edgeflag_in < 16) + nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ + n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4); - vtxfmt = so_new(n_ve + 1, 0); + vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4); + vtxfmt = so_new(1, n_ve, 0); so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); for (i = 0; i < nv50->vtxelt_nr; i++) { @@ -445,7 +456,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); /* vertex array limits */ - so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2); + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_reloc (vtxbuf, bo, vb->buffer->size - 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -479,6 +490,9 @@ struct nv50_vbo_emitctx unsigned nr_ve; unsigned vtx_dwords; unsigned vtx_max; + + float edgeflag; + unsigned ve_edgeflag; }; static INLINE void @@ -622,6 +636,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, if (nv50_map_vbufs(nv50) == FALSE) return FALSE; + emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in; + + emit->edgeflag = 0.5f; emit->nr_ve = 0; emit->vtx_dwords = 0; @@ -644,7 +661,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, desc = util_format_description(ve->src_format); assert(desc); - size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); + size = util_format_get_component_bits( + ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); assert(ve->nr_components > 0 && ve->nr_components <= 4); @@ -686,10 +704,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, } emit->vtx_max = 512 / emit->vtx_dwords; + if (emit->ve_edgeflag < 16) + emit->vtx_max = 1; return TRUE; } +static INLINE void +set_edgeflag(struct nouveau_channel *chan, + struct nouveau_grobj *tesla, + struct nv50_vbo_emitctx *emit, uint32_t index) +{ + unsigned i = emit->ve_edgeflag; + + if (i < 16) { + float f = *((float *)(emit->map[i] + index * emit->stride[i])); + + if (emit->edgeflag != f) { + emit->edgeflag = f; + + BEGIN_RING(chan, tesla, 0x15e4, 1); + OUT_RING (chan, f ? 1 : 0); + } + } +} + static boolean nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) { @@ -704,6 +743,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); @@ -729,6 +770,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -754,6 +797,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -779,6 +824,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 0d2de17be93..183aa17f9b3 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -4,7 +4,12 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') env = env.Clone() # add the paths for r300compiler -env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa']) +env.Append(CPPPATH = [ + '#/src/mesa/drivers/dri/r300/compiler', + '#/src/gallium/winsys/drm/radeon/core', + '#/include', + '#/src/mesa', +]) r300 = env.ConvenienceLibrary( target = 'r300', diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ffe066d5369..c14414fff6b 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -27,9 +27,9 @@ static void r300_blitter_save_states(struct r300_context* r300) { - util_blitter_save_blend(r300->blitter, r300->blend_state); - util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); - util_blitter_save_rasterizer(r300->blitter, r300->rs_state); + util_blitter_save_blend(r300->blitter, r300->blend_state.state); + util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); + util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs); } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff34..92de297ef1d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a72..28084864929 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d5c2d63d393..5e4f6552c36 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_blit.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_flush.h" #include "r300_query.h" #include "r300_render.h" @@ -69,11 +70,13 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } - FREE(r300->blend_color_state); + FREE(r300->blend_color_state.state); + FREE(r300->clip_state.state); FREE(r300->rs_block); - FREE(r300->scissor_state); + FREE(r300->scissor_state.state); FREE(r300->vertex_info); - FREE(r300->viewport_state); + FREE(r300->viewport_state.state); + FREE(r300->ztop_state.state); FREE(r300); } @@ -107,6 +110,35 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); + +static void r300_setup_atoms(struct r300_context* r300) +{ + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ + make_empty_list(&r300->atom_list); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(rs, 22); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys) { @@ -155,11 +187,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); - r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300_setup_atoms(r300); + + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 232530b7dc4..682b9179c83 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,9 +30,28 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +struct r300_context; + struct r300_fragment_shader; struct r300_vertex_shader; +struct r300_atom { + /* List pointers. */ + struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ + void* state; + /* Emit the state to the context. */ + void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ + boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; +}; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -62,11 +81,6 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; - /* Whether or not to enable the VTE. This is referenced at the very - * last moment during emission of VTE state, to decide whether or not - * the VTE should be used for transformation. */ - boolean enable_vte; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -102,19 +116,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ - - /* Whether everything is culled by scissoring. */ - boolean empty_area; -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -135,24 +136,17 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_BLEND 0x00000001 -#define R300_NEW_BLEND_COLOR 0x00000002 -#define R300_NEW_CLIP 0x00000004 -#define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_SCISSOR 0x00020000 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 -#define R300_NEW_VIEWPORT 0x20000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -194,6 +188,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -230,6 +230,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -273,38 +276,40 @@ struct r300_context { struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ + /* Beginning of atom list. */ + struct r300_atom atom_list; /* Blend state. */ - struct r300_blend_state* blend_state; + struct r300_atom blend_state; /* Blend color state. */ - struct r300_blend_color_state* blend_color_state; + struct r300_atom blend_color_state; /* User clip planes. */ - struct pipe_clip_state clip_state; + struct r300_atom clip_state; /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ - struct r300_dsa_state* dsa_state; + struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; /* Framebuffer state. We currently don't need our own version of this. */ struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ - struct r300_rs_state* rs_state; + struct r300_atom rs_state; /* RS block state. */ struct r300_rs_block* rs_block; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; /* Scissor state. */ - struct r300_scissor_state* scissor_state; + struct r300_atom scissor_state; /* Texture states. */ struct r300_texture* textures[8]; int texture_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ - struct r300_viewport_state* viewport_state; + struct r300_atom viewport_state; /* ZTOP state. */ - struct r300_ztop_state ztop_state; + struct r300_atom ztop_state; /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -317,6 +322,8 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + /* Whether the TCL engine should be in bypass mode. */ + boolean tcl_bypass; /** Combination of DBG_xxx flags */ unsigned debug; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee0502..151f72b0fe4 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 199ce3a945d..9f93327e598 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cs.h" @@ -36,11 +37,13 @@ #include "r300_texture.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend) +void r300_emit_blend_state(struct r300_context* r300, void* state) { + struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); + BEGIN_CS(8); + OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (r300->framebuffer_state.nr_cbufs) { OUT_CS(blend->blend_control); @@ -52,14 +55,13 @@ void r300_emit_blend_state(struct r300_context* r300, OUT_CS(0); /* XXX also disable fastfill here once it's supported */ } - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc) +void r300_emit_blend_color_state(struct r300_context* r300, void* state) { + struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -76,9 +78,9 @@ void r300_emit_blend_color_state(struct r300_context* r300, } } -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip) +void r300_emit_clip_state(struct r300_context* r300, void* state) { + struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -106,13 +108,13 @@ void r300_emit_clip_state(struct r300_context* r300, } -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa) +void r300_emit_dsa_state(struct r300_context* r300, void* state) { + struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); /* not needed since we use the 8bit alpha ref */ @@ -121,10 +123,16 @@ void r300_emit_dsa_state(struct r300_context* r300, }*/ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + + if (r300->framebuffer_state.zsbuf) { + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); + } else { + OUT_CS(0); + OUT_CS(0); + } + OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { @@ -138,6 +146,8 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -160,11 +170,31 @@ static const float * get_shader_constant( /* Texture compare-fail value. */ /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0). */ + * this is always (0,0,0,0), right? */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; + case RC_STATE_R300_VIEWPORT_SCALE: + if (r300->tcl_bypass) { + vec[0] = 1; + vec[1] = 1; + vec[2] = 1; + } else { + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; + } + break; + + case RC_STATE_R300_VIEWPORT_OFFSET: + if (!r300->tcl_bypass) { + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; + } + break; + default: debug_printf("r300: Implementation error: " "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); @@ -283,6 +313,22 @@ void r300_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } +static void r300_emit_fragment_depth_config(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + CS_LOCALS(r300); + + BEGIN_CS(4); + if (r300_fragment_shader_writes_depth(fs)) { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US); + } else { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US); + } + END_CS; +} + void r500_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code) { @@ -374,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -398,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -531,8 +581,9 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +void r300_emit_rs_state(struct r300_context* r300, void* state) { + struct r300_rs_state* rs = (struct r300_rs_state*)state; CS_LOCALS(r300); BEGIN_CS(22); @@ -595,26 +646,47 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = r300->framebuffer_state.width; + maxy = r300->framebuffer_state.height; -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) -{ - if (r300->rs_state->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -650,8 +722,10 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } @@ -717,32 +791,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) -{ - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -867,26 +915,27 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport) +void r300_emit_viewport_state(struct r300_context* r300, void* state) { + struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - - if (r300->rs_state->enable_vte) { - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - } else { + if (r300->tcl_bypass) { + BEGIN_CS(2); OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + END_CS; + } else { + BEGIN_CS(9); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } - END_CS; } void r300_emit_texture_count(struct r300_context* r300) @@ -910,6 +959,16 @@ void r300_emit_texture_count(struct r300_context* r300) } +void r300_emit_ztop_state(struct r300_context* r300, void* state) +{ + struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); + END_CS; +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); @@ -933,18 +992,24 @@ void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; - int i, dirty_tex = 0; + struct r300_atom* atom; + unsigned i, dwords = 1024; + int dirty_tex = 0; boolean invalid = FALSE; - if (!(r300->dirty_state)) { - return; + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } } - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ + /* Make sure we have at least 2*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + if (!r300->winsys->check_cs(r300->winsys, dwords)) { r300->context.flush(&r300->context, 0, NULL); } @@ -984,10 +1049,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -997,7 +1064,7 @@ validate: goto validate; } } else { - // debug_printf("No VBO while emitting dirty state!\n"); + /* debug_printf("No VBO while emitting dirty state!\n"); */ } if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); @@ -1015,27 +1082,15 @@ validate: r300->dirty_state &= ~R300_NEW_QUERY; } - if (r300->dirty_state & R300_NEW_BLEND) { - r300_emit_blend_state(r300, r300->blend_state); - r300->dirty_state &= ~R300_NEW_BLEND; - } - - if (r300->dirty_state & R300_NEW_BLEND_COLOR) { - r300_emit_blend_color_state(r300, r300->blend_color_state); - r300->dirty_state &= ~R300_NEW_BLEND_COLOR; - } - - if (r300->dirty_state & R300_NEW_CLIP) { - r300_emit_clip_state(r300, &r300->clip_state); - r300->dirty_state &= ~R300_NEW_CLIP; - } - - if (r300->dirty_state & R300_NEW_DSA) { - r300_emit_dsa_state(r300, r300->dsa_state); - r300->dirty_state &= ~R300_NEW_DSA; + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + atom->emit(r300, atom->state); + atom->dirty = FALSE; + } } if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + r300_emit_fragment_depth_config(r300, r300->fs); if (r300screen->caps->is_r500) { r500_emit_fragment_program_code(r300, &r300->fs->shader->code); } else { @@ -1060,21 +1115,11 @@ validate: r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; } - if (r300->dirty_state & R300_NEW_RASTERIZER) { - r300_emit_rs_state(r300, r300->rs_state); - r300->dirty_state &= ~R300_NEW_RASTERIZER; - } - if (r300->dirty_state & R300_NEW_RS_BLOCK) { r300_emit_rs_block_state(r300, r300->rs_block); r300->dirty_state &= ~R300_NEW_RS_BLOCK; } - if (r300->dirty_state & R300_NEW_SCISSOR) { - r300_emit_scissor_state(r300, r300->scissor_state); - r300->dirty_state &= ~R300_NEW_SCISSOR; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { @@ -1096,11 +1141,6 @@ validate: r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); } - if (r300->dirty_state & R300_NEW_VIEWPORT) { - r300_emit_viewport_state(r300, r300->viewport_state); - r300->dirty_state &= ~R300_NEW_VIEWPORT; - } - if (dirty_tex) { r300_flush_textures(r300); } @@ -1129,7 +1169,7 @@ validate: */ /* Finally, emit the VBO. */ - //r300_emit_vertex_buffer(r300); + /* r300_emit_vertex_buffer(r300); */ r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 3797d3d332a..05a6bfeae86 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,17 +31,13 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend); +void r300_emit_blend_state(struct r300_context* r300, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc); +void r300_emit_blend_color_state(struct r300_context* r300, void* state); -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip); +void r300_emit_clip_state(struct r300_context* r300, void* state); -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa); +void r300_emit_dsa_state(struct r300_context* r300, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -63,13 +59,12 @@ void r300_emit_query_begin(struct r300_context* r300, void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); +void r300_emit_rs_state(struct r300_context* r300, void* state); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor); +void r300_emit_scissor_state(struct r300_context* r300, void* state); void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, @@ -89,11 +84,12 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport); +void r300_emit_viewport_state(struct r300_context* r300, void* state); void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, void* state); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc4..59819cb1061 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,8 +37,10 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); + (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -54,7 +56,15 @@ static void r300_flush(struct pipe_context* pipe, r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4e1b61ca409..60ea9c171d5 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -63,6 +63,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->fog = i; break; + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + fs_inputs->wpos = i; + break; + default: assert(0); } @@ -114,6 +119,9 @@ static void allocate_hardware_inputs( if (inputs->fog != ATTR_UNUSED) { allocate(mydata, inputs->fog, reg++); } + if (inputs->wpos != ATTR_UNUSED) { + allocate(mydata, inputs->wpos, reg++); + } } static void get_compare_state( @@ -144,6 +152,7 @@ static void r300_translate_fragment_shader( struct r300_fragment_shader* fs = r300->fs; struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + int wpos = fs->inputs.wpos; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -171,6 +180,18 @@ static void r300_translate_fragment_shader( fs->shadow_samplers = compiler.Base.Program.ShadowSamplers; + /** + * Transform the program to support WPOS. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. */ + if (wpos != ATTR_UNUSED) { + /* Moving the input to some other reg is not really necessary. */ + rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d8d08fbe264..361813891fb 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2186,6 +2188,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) +# define R500_SRC_ALPHA_0_NO_READ (1 << 30) +# define R500_SRC_ALPHA_1_NO_READ (1 << 31) /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) @@ -2281,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2542,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) @@ -2638,7 +2646,7 @@ enum { VE_COND_MUX_GTE = 25, VE_SET_GREATER_THAN = 26, VE_SET_EQUAL = 27, - VE_SET_NOT_EQUAL = 28, + VE_SET_NOT_EQUAL = 28 }; enum { @@ -2672,20 +2680,20 @@ enum { ME_PRED_SET_CLR = 25, ME_PRED_SET_INV = 26, ME_PRED_SET_POP = 27, - ME_PRED_SET_RESTORE = 28, + ME_PRED_SET_RESTORE = 28 }; enum { /* R3XX */ PVS_MACRO_OP_2CLK_MADD = 0, - PVS_MACRO_OP_2CLK_M2X_ADD = 1, + PVS_MACRO_OP_2CLK_M2X_ADD = 1 }; enum { PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ - PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */ }; enum { @@ -2694,7 +2702,7 @@ enum { PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ - PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */ }; enum { @@ -2703,7 +2711,7 @@ enum { PVS_SRC_SELECT_Z = 2, /* Select Z Component */ PVS_SRC_SELECT_W = 3, /* Select W Component */ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ - PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ + PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */ }; /* PVS Opcode & Destination Operand Description */ @@ -2742,7 +2750,7 @@ enum { PVS_DST_ADDR_SEL_MASK = 0x3, PVS_DST_ADDR_SEL_SHIFT = 29, PVS_DST_ADDR_MODE_0_MASK = 0x1, - PVS_DST_ADDR_MODE_0_SHIFT = 31, + PVS_DST_ADDR_MODE_0_SHIFT = 31 }; /* PVS Source Operand Description */ @@ -2777,7 +2785,7 @@ enum { PVS_SRC_ADDR_SEL_MASK = 0x3, PVS_SRC_ADDR_SEL_SHIFT = 29, PVS_SRC_ADDR_MODE_1_MASK = 0x0, - PVS_SRC_ADDR_MODE_1_SHIFT = 32, + PVS_SRC_ADDR_MODE_1_SHIFT = 32 }; /*\}*/ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2d70ec2ac94..710d850163f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,8 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "indices/u_indices.h" + #include "pipe/p_inlines.h" #include "util/u_memory.h" @@ -69,16 +71,11 @@ uint32_t r300_translate_primitive(unsigned prim) } } -static boolean r300_nothing_to_draw(struct r300_context *r300) -{ - return r300->rs_state->rs.scissor && - r300->scissor_state->scissor.empty_area; -} - static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, unsigned mode) { - uint32_t color_control = r300->rs_state->color_control; + struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; + uint32_t color_control = rs->color_control; /* By default (see r300_state.c:r300_create_rs_state) color_control is * initialized to provoking the first vertex. @@ -98,7 +95,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, * ~ C. */ - if (r300->rs_state->rs.flatshade_first) { + if (rs->rs.flatshade_first) { switch (mode) { case PIPE_PRIM_TRIANGLE_FAN: color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; @@ -119,6 +116,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static void r300_emit_draw_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; + unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); + unsigned i; + uint32_t* map; + CS_LOCALS(r300); + + map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, + start * vertex_size, count * vertex_size, + PIPE_BUFFER_USAGE_CPU_READ); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); + for (i = 0; i < count * vertex_size; i++) { + if (i % vertex_size == 0) { + //debug_printf("r300: -- vert --\n"); + } + //debug_printf("r300: 0x%08x\n", *map); + OUT_CS(*map); + map++; + } + END_CS; + + pipe_buffer_unmap(r300->context.screen, vbo); +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -212,43 +247,84 @@ validate: return TRUE; } +static struct pipe_buffer* r300_translate_elts(struct r300_context* r300, + struct pipe_buffer* elts, + unsigned* size, + unsigned* mode, + unsigned* count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + void *in_map, *out_map; + unsigned out_prim, out_index_size, out_nr; + u_translate_func out_translate; + + (void)u_index_translator(~0, *mode, *size, *count, PV_LAST, PV_LAST, + &out_prim, &out_index_size, &out_nr, &out_translate); + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + out_index_size * out_nr); + + in_map = pipe_buffer_map(screen, elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + out_translate(in_map, *count, out_map); + + pipe_buffer_unmap(screen, elts); + pipe_buffer_unmap(screen, new_elts); + + *size = out_index_size; + *mode = out_prim; + *count = out_nr; + + return new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: use aux/indices functions to split this into smaller + * primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; + } + + if (indexSize == 1) { + indexBuffer = r300_translate_elts(r300, indexBuffer, + &indexSize, &mode, &count); } if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return FALSE; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return FALSE; + goto cleanup; } r300_emit_dirty_state(r300); @@ -258,49 +334,52 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - return TRUE; +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) { - return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); } -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: driver needs to handle this -- use the functions in + * aux/indices to split this into several smaller primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } r300_emit_dirty_state(r300); - r300_emit_aos(r300, start); - - r300_emit_draw_arrays(r300, mode, count); - - return TRUE; + if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { + r300_emit_draw_immediate(r300, mode, start, count); + } else { + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** @@ -309,7 +388,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, ***************************************************************************/ /* SW TCL arrays, using Draw. */ -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, +void r300_swtcl_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) @@ -318,11 +397,7 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, int i; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -335,8 +410,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_set_mapped_element_buffer(r300->draw, 0, NULL); draw_set_mapped_constant_buffer(r300->draw, - r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].count * + PIPE_SHADER_VERTEX, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); draw_arrays(r300->draw, mode, start, count); @@ -345,12 +421,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - - return TRUE; } /* SW TCL elements, using Draw. */ -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -361,13 +435,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); int i; + void* indices; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -377,12 +448,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(r300->draw, indexSize, minIndex, maxIndex, indices); draw_set_mapped_constant_buffer(r300->draw, + PIPE_SHADER_VERTEX, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -397,8 +469,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(r300->draw, 0, start, start + count - 1, NULL); - - return TRUE; } /* Object for rendering using Draw. */ @@ -474,7 +544,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, PIPE_BUFFER_USAGE_CPU_WRITE); - return (r300render->vbo_ptr + r300render->vbo_offset); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index da83069083d..27b5e6a9630 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -25,35 +25,35 @@ uint32_t r300_translate_primitive(unsigned prim); -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count); - -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count); - -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); #endif /* R300_RENDER_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 2a8667d4835..287664b1d20 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -83,6 +83,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: @@ -143,9 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_SM3: - return 1; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } default: debug_printf("r300: Implementation error: Bad param %d\n", param); diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index 85184e2cfd7..6796841b29b 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -40,6 +40,7 @@ struct r300_shader_semantics { int bcolor[ATTR_COLOR_COUNT]; int generic[ATTR_GENERIC_COUNT]; int fog; + int wpos; }; static INLINE void r300_shader_semantics_reset( @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; info->fog = ATTR_UNUSED; + info->wpos = ATTR_UNUSED; for (i = 0; i < ATTR_COLOR_COUNT; i++) { info->color[i] = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 541b0abc9c5..60ad763cf48 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2009 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,6 +42,120 @@ /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ +static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 0, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 1, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + /* Create a new blend state based on the CSO blend state. * * This encompasses alpha blending, logic/raster ops, and blend dithering. */ @@ -66,7 +181,11 @@ static void* r300_create_blend_state(struct pipe_context* pipe, ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); - /* optimization: some operations do not require the destination color */ + /* Optimization: some operations do not require the destination color. + * + * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled, + * otherwise blending gives incorrect results. It seems to be + * a hardware bug. */ if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || dstRGB != PIPE_BLENDFACTOR_ZERO || @@ -78,11 +197,81 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcA == PIPE_BLENDFACTOR_DST_COLOR || srcA == PIPE_BLENDFACTOR_DST_ALPHA || srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || - srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { + /* Enable reading from the colorbuffer. */ blend->blend_control |= R300_READ_ENABLE; - /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ - /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) { + /* Optimization: Depending on incoming pixels, we can + * conditionally disable the reading in hardware... */ + if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN && + eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) { + /* Disable reading if SRC_ALPHA == 0. */ + if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_0_NO_READ; + } + + /* Disable reading if SRC_ALPHA == 1. */ + if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_1_NO_READ; + } + } + } + } + + /* Optimization: discard pixels which don't change the colorbuffer. + * + * The code below is non-trivial and some math is involved. + * + * Discarding pixels must be disabled when FP16 AA is enabled. + * This is a hardware bug. Also, this implementation wouldn't work + * with FP blending enabled and equation clamping disabled. + * + * Equations other than ADD are rarely used and therefore won't be + * optimized. */ + if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && + (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) { + /* ADD: X+Y + * REVERSE_SUBTRACT: Y-X + * + * The idea is: + * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1, + * then CB will not be changed. + * + * Given the srcFactor and dstFactor variables, we can derive + * what src and dst should be equal to and discard appropriate + * pixels. + */ + if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + } else if (blend_discard_if_src_alpha_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; + } else if (blend_discard_if_src_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; + } else if (blend_discard_if_src_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; + } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; + } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; + } + } /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { @@ -128,8 +317,8 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state = (struct r300_blend_state*)state; - r300->dirty_state |= R300_NEW_BLEND; + r300->blend_state.state = state; + r300->blend_state.dirty = TRUE; } /* Free blend state. */ @@ -151,20 +340,24 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); + struct r300_blend_color_state* state = + (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); - r300->blend_color_state->blend_color = uc.ui; + state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ - r300->blend_color_state->blend_color_red_alpha = + state->blend_color_red_alpha = float_to_fixed10(color->color[0]) | (float_to_fixed10(color->color[3]) << 16); - r300->blend_color_state->blend_color_green_blue = + state->blend_color_green_blue = float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); - r300->dirty_state |= R300_NEW_BLEND_COLOR; + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; + r300->blend_color_state.dirty = TRUE; } static void r300_set_clip_state(struct pipe_context* pipe, @@ -173,12 +366,15 @@ static void r300_set_clip_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (r300_screen(pipe->screen)->caps->has_tcl) { - r300->clip_state = *state; - r300->dirty_state |= R300_NEW_CLIP; + memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -271,9 +467,11 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); - r300->dsa_state = (struct r300_dsa_state*)state; - r300->dirty_state |= R300_NEW_DSA; + r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; + r300->dsa_state.dirty = TRUE; } /* Free DSA state. */ @@ -283,37 +481,11 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } - - scissor->empty_area = state->minx >= state->maxx || - state->miny >= state->maxy; -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct pipe_scissor_state scissor; if (r300->draw) { draw_flush(r300->draw); @@ -321,18 +493,12 @@ static void r300->framebuffer_state = *state; - scissor.minx = scissor.miny = 0; - scissor.maxx = state->width; - scissor.maxy = state->height; - r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || !r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - r300->dirty_state |= R300_NEW_BLEND; + + r300->blend_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; + r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -367,6 +533,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); + if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + } + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -407,8 +577,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; - rs->enable_vte = !state->bypass_vs_clip_and_viewport; - #ifdef PIPE_ARCH_LITTLE_ENDIAN rs->vap_control_status = R300_VC_NO_SWAP; #else @@ -524,12 +692,23 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->rs_state = rs; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + } else { + r300->tcl_bypass = FALSE; + } + + r300->rs_state.state = rs; + r300->rs_state.dirty = TRUE; + /* XXX Why is this still needed, dammit!? */ + r300->scissor_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; - r300->dirty_state |= R300_NEW_SCISSOR; - r300->dirty_state |= R300_NEW_VIEWPORT; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } /* Free rasterizer state. */ @@ -556,7 +735,8 @@ static void* sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, - state->min_mip_filter); + state->min_mip_filter, + state->max_anisotropy > 1.0); /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ /* We must pass these to the emit function to clamp them properly. */ @@ -664,49 +844,51 @@ static void r300_set_scissor_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300_set_scissor_regs(state, &r300->scissor_state->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } + r300->scissor_state.dirty = TRUE; } static void r300_set_viewport_state(struct pipe_context* pipe, const struct pipe_viewport_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + viewport->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + viewport->xscale = state->scale[0]; + viewport->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + viewport->yscale = state->scale[1]; + viewport->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + viewport->zscale = state->scale[2]; + viewport->vte_control |= R300_VPORT_Z_SCALE_ENA; } if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + viewport->xoffset = state->translate[0]; + viewport->vte_control |= R300_VPORT_X_OFFSET_ENA; } if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + viewport->yoffset = state->translate[1]; + viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA; } if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; + viewport->zoffset = state->translate[2]; + viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; } - r300->dirty_state |= R300_NEW_VIEWPORT; + r300->viewport_state.dirty = TRUE; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } static void r300_set_vertex_buffers(struct pipe_context* pipe, @@ -778,7 +960,13 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs = vs; - r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + if (r300->fs) { + r300_vertex_shader_setup_wpos(r300); + } + + r300->dirty_state |= + R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS | + R300_NEW_VERTEX_FORMAT; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 29bc701a86e..192846411ba 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -71,9 +71,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300, struct tgsi_shader_info* info = &r300->vs->info; int output; - output = draw_find_vs_output(r300->draw, - info->output_semantic_name[index], - info->output_semantic_index[index]); + output = draw_find_shader_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); } @@ -139,10 +139,10 @@ static void r300_vertex_psc(struct r300_context* r300) /* If TCL is bypassed, map vertex streams to equivalent VS output * locations. */ - if (r300->rs_state->enable_vte) { - stream_tab = identity; - } else { + if (r300->tcl_bypass) { stream_tab = r300->vs->stream_loc_notcl; + } else { + stream_tab = identity; } /* Vertex shaders have no semantics on their inputs, @@ -333,6 +333,8 @@ static void r300_update_rs_block(struct r300_context* r300, void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; if (r300_screen(r300->context.screen)->caps->is_r500) { rX00_rs_col = r500_rs_col; @@ -348,7 +350,7 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ rX00_rs_col(rs, col_count, i, FALSE); @@ -410,6 +412,16 @@ static void r300_update_rs_block(struct r300_context* r300, } } + /* Rasterize WPOS. */ + /* If the FS doesn't need it, it's not written by the VS. */ + if (fs_inputs->wpos != ATTR_UNUSED) { + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + rX00_rs_tex_write(rs, tex_count, fp_offset); + + fp_offset++; + tex_count++; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { rX00_rs_col(rs, 0, 0, TRUE); @@ -496,7 +508,8 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) static void r300_update_ztop(struct r300_context* r300) { - r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; /* This is important enough that I felt it warranted a comment. * @@ -518,31 +531,37 @@ static void r300_update_ztop(struct r300_context* r300) * 5) Depth writes in fragment shader * 6) Outstanding occlusion queries * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * * ~C. */ /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ - r300->fs->info.uses_kill)) { /* (2) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } + + r300->ztop_state.dirty = TRUE; } void r300_update_derived_state(struct r300_context* r300) { + /* XXX */ if (r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT)) { + R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } - if (r300->dirty_state & - (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { - r300_update_ztop(r300); - } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index dbe42edd910..35be00e1b01 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -257,38 +257,37 @@ static INLINE uint32_t r300_translate_wrap(int wrap) } } -static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) +static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip, + int is_anisotropic) { uint32_t retval = 0; - switch (min) { + if (is_anisotropic) + retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO; + else { + switch (min) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MIN_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MIN_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MIN_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", min); assert(0); break; - } - switch (mag) { + } + switch (mag) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MAG_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MAG_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MAG_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", mag); assert(0); break; + } } switch (mip) { case PIPE_TEX_MIPFILTER_NONE: diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index bcd4c030f9c..b0f309695c9 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(20 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(16 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -66,8 +66,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); - OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ /* Sign/normalize control */ @@ -117,10 +115,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + + if (caps->family >= CHIP_FAMILY_RV350) { + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4dc..a9bbdd56d84 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,6 +30,18 @@ #include "r300_texture.h" #include "r300_screen.h" +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; @@ -92,33 +104,67 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(u_minify(tex->tex.width0, level), tile_width); + + /* Should already be aligned except for S3TC. */ + return align(util_format_get_stride(tex->tex.format, width), 32); +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(u_minify(tex->tex.height0, level), tile_height); + + return util_format_get_nblocksy(tex->tex.format, height); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + debug_printf("r300: Making miptree for texture, format %s\n", pf_name(base->format)); + for (i = 0; i <= base->last_level; i++) { stride = r300_texture_get_stride(tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -132,9 +178,9 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->pitch[i] = stride / util_format_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -163,7 +209,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 096cdb20bbe..a792c2cf989 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index c4ed0d712f9..68aef70872e 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_vs.h" +#include "r300_fs.h" #include "r300_context.h" #include "r300_screen.h" @@ -33,6 +34,8 @@ #include "radeon_compiler.h" +#include "util/u_math.h" + /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, @@ -88,11 +91,13 @@ static void r300_shader_read_vs_outputs( } } -static void r300_shader_vap_output_fmt( - struct r300_shader_semantics* vs_outputs, - uint* hwfmt) +static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) { + struct r300_shader_semantics* vs_outputs = &vs->outputs; + uint32_t* hwfmt = vs->hwfmt; int i, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Do the actual vertex_info setup. * @@ -119,13 +124,19 @@ static void r300_shader_vap_output_fmt( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + } + } /* Texture coordinates. */ gen_count = 0; @@ -146,6 +157,9 @@ static void r300_shader_vap_output_fmt( /* XXX magic */ assert(gen_count <= 8); + + /* WPOS. */ + vs->wpos_tex_output = gen_count; } /* Sets up stream mapping to equivalent VS outputs if TCL is bypassed @@ -155,6 +169,8 @@ static void r300_stream_locations_notcl( int* stream_loc) { int i, tabi = 0, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Position. */ stream_loc[tabi++] = 0; @@ -166,14 +182,14 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { stream_loc[tabi++] = 2 + i; } } /* Back-face colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { stream_loc[tabi++] = 4 + i; } } @@ -181,7 +197,7 @@ static void r300_stream_locations_notcl( /* Texture coordinates. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; @@ -195,8 +211,12 @@ static void r300_stream_locations_notcl( gen_count++; } - /* XXX magic */ - assert(gen_count <= 8); + /* WPOS. */ + if (vs_outputs->wpos != ATTR_UNUSED) { + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } for (; tabi < 16;) { stream_loc[tabi++] = -1; @@ -209,6 +229,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) struct r300_shader_semantics* outputs = &vs->outputs; struct tgsi_shader_info* info = &vs->info; int i, reg = 0; + boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED || + outputs->bcolor[1] != ATTR_UNUSED; /* Fill in the input mapping */ for (i = 0; i < info->num_inputs; i++) @@ -226,14 +248,30 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) c->code->outputs[outputs->psize] = reg++; } + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; + } else if (any_bcolor_used) { + reg++; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->bcolor[i] != ATTR_UNUSED) { + c->code->outputs[outputs->bcolor[i]] = reg++; + } else if (any_bcolor_used) { + reg++; + } + } /* Texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT; i++) { @@ -246,6 +284,33 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (outputs->fog != ATTR_UNUSED) { c->code->outputs[outputs->fog] = reg++; } + + /* WPOS. */ + if (outputs->wpos != ATTR_UNUSED) { + c->code->outputs[outputs->wpos] = reg++; + } +} + +static void r300_insert_wpos(struct r300_vertex_program_compiler* c, + struct r300_shader_semantics* outputs) +{ + int i, lastOutput = 0; + + /* Find the max output index. */ + lastOutput = MAX2(lastOutput, outputs->psize); + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->color[i]); + lastOutput = MAX2(lastOutput, outputs->bcolor[i]); + } + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->generic[i]); + } + lastOutput = MAX2(lastOutput, outputs->fog); + + /* Set WPOS after the last output. */ + lastOutput++; + rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */ + outputs->wpos = lastOutput; } void r300_translate_vertex_shader(struct r300_context* r300, @@ -256,8 +321,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Initialize. */ r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); - r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); /* Setup the compiler */ rc_init(&compiler.Base); @@ -277,9 +340,15 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); - compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs); + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; + /* Insert the WPOS output. */ + r300_insert_wpos(&compiler, &vs->outputs); + + r300_shader_vap_output_fmt(vs); + r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { @@ -292,3 +361,30 @@ void r300_translate_vertex_shader(struct r300_context* r300, rc_destroy(&compiler.Base); vs->translated = TRUE; } + +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) +{ + struct r300_vertex_shader* vs = r300->vs; + int tex_output = r300->vs->wpos_tex_output; + uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; + uint32_t* hwfmt = vs->hwfmt; + + if (r300->fs->inputs.wpos != ATTR_UNUSED) { + /* Enable WPOS in VAP. */ + if (!(hwfmt[1] & tex_fmt)) { + hwfmt[1] |= tex_fmt; + hwfmt[3] |= (4 << (3 * tex_output)); + + assert(tex_output < 8); + return TRUE; + } + } else { + /* Disable WPOS in VAP. */ + if (hwfmt[1] & tex_fmt) { + hwfmt[1] &= ~tex_fmt; + hwfmt[3] &= ~(4 << (3 * tex_output)); + return TRUE; + } + } + return FALSE; +} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 67e9db5366f..18cfeee3cd4 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -43,6 +43,9 @@ struct r300_vertex_shader { /* Stream locations for SWTCL or if TCL is bypassed. */ int stream_loc_notcl[16]; + /* Output stream location for WPOS. */ + int wpos_tex_output; + /* Has this shader been translated yet? */ boolean translated; @@ -53,4 +56,7 @@ struct r300_vertex_shader { void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); +/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); + #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index f98087deb8c..5f130453c39 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,6 +36,7 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_tile_cache.h" @@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, if (softpipe->no_rast) return; + if (!softpipe_check_render_cond(softpipe)) + return; + #if 0 softpipe_update_derived(softpipe); /* not needed?? */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 0862e9f24bd..8e017939402 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, } +static void +softpipe_render_condition( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + + softpipe->render_cond_query = query; + softpipe->render_cond_mode = mode; +} + + + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -191,6 +204,7 @@ softpipe_create( struct pipe_screen *screen ) #endif softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = screen->winsys; softpipe->pipe.screen = screen; @@ -222,6 +236,10 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + softpipe->pipe.create_gs_state = softpipe_create_gs_state; + softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; + softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; + softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_clip_state = softpipe_set_clip_state; softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; @@ -238,6 +256,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; + softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; @@ -247,6 +267,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe_init_query_funcs( softpipe ); + softpipe->pipe.render_condition = softpipe_render_condition; + /* * Alloc caches for accessing drawing surfaces and textures. * Must be before quad stage setup! diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index ac24fccb4c1..da673c57ada 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -58,6 +58,7 @@ struct softpipe_context { struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; struct sp_vertex_shader *vs; + struct sp_geometry_shader *gs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -115,6 +116,10 @@ struct softpipe_context { unsigned line_stipple_counter; + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; @@ -147,6 +152,7 @@ struct softpipe_context { unsigned use_sse : 1; unsigned dump_fs : 1; + unsigned dump_gs : 1; unsigned no_rast : 1; }; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 96e1c5d8159..03b58d2fb72 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,6 +38,7 @@ #include "util/u_prim.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_state.h" #include "draw/draw_context.h" @@ -48,7 +49,7 @@ static void softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; - uint i, size; + uint i, vssize, gssize; for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i] && sp->constants[i]->size) @@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) } if (sp->constants[PIPE_SHADER_VERTEX]) - size = sp->constants[PIPE_SHADER_VERTEX]->size; + vssize = sp->constants[PIPE_SHADER_VERTEX]->size; else - size = 0; + vssize = 0; - draw_set_mapped_constant_buffer(sp->draw, + if (sp->constants[PIPE_SHADER_GEOMETRY]) + gssize = sp->constants[PIPE_SHADER_GEOMETRY]->size; + else + gssize = 0; + + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], - size); + vssize); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, + sp->mapped_constants[PIPE_SHADER_GEOMETRY], + gssize); } @@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); - for (i = 0; i < 2; i++) { + for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i] && sp->constants[i]->size) ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; @@ -88,20 +98,42 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } -boolean +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ -boolean +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -109,51 +141,142 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); +} + + +void +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); +} + +void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; unsigned i; + if (!softpipe_check_render_cond(sp)) + return; + sp->reduced_api_prim = u_reduced_prim(mode); - if (sp->dirty) - softpipe_update_derived( sp ); + if (sp->dirty) { + softpipe_update_derived(sp); + } softpipe_map_transfers(sp); softpipe_map_constant_buffers(sp); - /* - * Map vertex buffers - */ + /* Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf - = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *buf; + + buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, + void *mapped_indexes; + + mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, + indexSize, + minIndex, + maxIndex, mapped_indexes); - } - else { + } else { /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); + draw_set_mapped_element_buffer_range(draw, + 0, + start, + start + count - 1, + NULL); } /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); - /* - * unmap vertex/index buffers - will cause draw module to flush - */ + /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); @@ -163,24 +286,8 @@ softpipe_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); } - /* Note: leave drawing surfaces mapped */ softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; - - return TRUE; -} - - -boolean -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - return softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 5fbac06a535..7f573aef3c3 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index fe6b6cec353..d9babe81dad 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs, static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; - float source[4][QUAD_SIZE]; + float source[4][QUAD_SIZE] = { { 0 } }; /* * Compute src/first term RGB diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 379cf4ad064..4ef5d9f7b1d 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe, } +/** + * Called by rendering function to check rendering is conditional. + * \return TRUE if we should render, FALSE if we should skip rendering + */ +boolean +softpipe_check_render_cond(struct softpipe_context *sp) +{ + struct pipe_context *pipe = &sp->pipe; + boolean b, wait; + uint64_t result; + + if (!sp->render_cond_query) { + return TRUE; /* no query predicate, draw normally */ + } + + wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT || + sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result); + if (b) + return result > 0; + else + return TRUE; +} + + void softpipe_init_query_funcs(struct softpipe_context *softpipe ) { softpipe->pipe.create_query = softpipe_create_query; diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h index 05060a45759..736c033897e 100644 --- a/src/gallium/drivers/softpipe/sp_query.h +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -32,6 +32,10 @@ #ifndef SP_QUERY_H #define SP_QUERY_H +extern boolean +softpipe_check_render_cond(struct softpipe_context *sp); + + struct softpipe_context; extern void softpipe_init_query_funcs(struct softpipe_context * ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 615581b95f9..3da75364c5d 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup ) } /* Note: nr_attrs is only used for debugging (vertex printing) */ - setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw); + setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw); sp->quad.first->begin( sp->quad.first ); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index c41e7184880..7f244c4fd49 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -50,6 +50,7 @@ #define SP_NEW_VERTEX 0x1000 #define SP_NEW_VS 0x2000 #define SP_NEW_QUERY 0x4000 +#define SP_NEW_GS 0x8000 struct tgsi_sampler; @@ -90,6 +91,11 @@ struct sp_vertex_shader { int max_sampler; /* -1 if no samplers */ }; +/** Subclass of pipe_shader_state */ +struct sp_geometry_shader { + struct pipe_shader_state shader; + struct draw_geometry_shader *draw_data; +}; void * @@ -143,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *, const struct pipe_shader_state *); void softpipe_bind_vs_state(struct pipe_context *, void *); void softpipe_delete_vs_state(struct pipe_context *, void *); +void *softpipe_create_gs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_gs_state(struct pipe_context *, void *); +void softpipe_delete_gs_state(struct pipe_context *, void *); void softpipe_set_polygon_stipple( struct pipe_context *, const struct pipe_poly_stipple * ); @@ -174,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *, void softpipe_update_derived( struct softpipe_context *softpipe ); -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); -boolean +void softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -190,6 +200,24 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void softpipe_map_transfers(struct softpipe_context *sp); void diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index efed082f823..95ab3234337 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -29,6 +29,7 @@ */ #include "util/u_memory.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" @@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend = (struct pipe_blend_state *)blend; softpipe->dirty |= SP_NEW_BLEND; @@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend_color = *blend_color; softpipe->dirty |= SP_NEW_BLEND; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index c24a737d07b..f6856a5f691 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(softpipe->draw); + const uint num = draw_current_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + src = draw_find_shader_output(softpipe->draw, + spfs->info.input_semantic_name[i], + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + softpipe->psize_slot = draw_find_shader_output(softpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index eba0563c62c..b7ed4441b43 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->fs = (struct sp_fragment_shader *) fs; + draw_flush(softpipe->draw); + + if (softpipe->fs == fs) + return; + + draw_flush(softpipe->draw); + + softpipe->fs = fs; softpipe->dirty |= SP_NEW_FS; } @@ -159,8 +166,74 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + draw_flush(softpipe->draw); + /* note: reference counting */ pipe_buffer_reference(&softpipe->constants[shader], buf); softpipe->dirty |= SP_NEW_CONSTANTS; } + +void * +softpipe_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_geometry_shader *state; + + state = CALLOC_STRUCT(sp_geometry_shader); + if (state == NULL ) + goto fail; + + /* debug */ + if (softpipe->dump_gs) + tgsi_dump(templ->tokens, 0); + + /* copy shader tokens, the ones passed in will go away. + */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (state->shader.tokens == NULL) + goto fail; + + state->draw_data = draw_create_geometry_shader(softpipe->draw, templ); + if (state->draw_data == NULL) + goto fail; + + return state; + +fail: + if (state) { + FREE( (void *)state->shader.tokens ); + FREE( state->draw_data ); + FREE( state ); + } + return NULL; +} + + +void +softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->gs = (struct sp_geometry_shader *)gs; + + draw_bind_geometry_shader(softpipe->draw, + (softpipe->gs ? softpipe->gs->draw_data : NULL)); + + softpipe->dirty |= SP_NEW_GS; +} + + +void +softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_geometry_shader *state = + (struct sp_geometry_shader *)gs; + + draw_delete_geometry_shader(softpipe->draw, + (state) ? state->draw_data : 0); + FREE(state); +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 87b72196838..a5b00336d44 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe, } void softpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct softpipe_context *softpipe = softpipe_context(pipe); + if (softpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(softpipe->draw, setup); + draw_set_rasterizer_state(softpipe->draw, rasterizer); - softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + softpipe->rasterizer = rasterizer; softpipe->dirty |= SP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index a518248bb18..f6154109ea8 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -51,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, struct softpipe_context *sp = softpipe_context(pipe); uint i; + draw_flush(sp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index e26153b1d90..1ae8fecacf7 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2,7 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. + * Copyright 2008-2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -514,21 +514,15 @@ static float compute_lambda_1d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float rho = MAX2(dsdx, dsdy) * texture->width0; - float lambda; - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - return lambda; + return util_fast_log2(rho); } @@ -536,8 +530,7 @@ static float compute_lambda_2d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -548,13 +541,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float rho = MAX2(maxx, maxy); - float lambda; - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } @@ -562,8 +550,7 @@ static float compute_lambda_3d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -576,31 +563,26 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float maxz = MAX2(dpdx, dpdy) * texture->depth0; - float rho, lambda; + float rho; rho = MAX2(maxx, maxy); rho = MAX2(rho, maxz); - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } /** * Compute lambda for a vertex texture sampler. - * Since there aren't derivatives to use, just return the LOD bias. + * Since there aren't derivatives to use, just return 0. */ static float compute_lambda_vert(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { - return lodbias; + return 0.0f; } @@ -769,7 +751,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -827,7 +810,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -866,7 +850,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -914,7 +899,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -949,7 +935,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -996,7 +983,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1035,7 +1023,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1076,7 +1065,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1115,7 +1105,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1161,7 +1152,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1209,7 +1201,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1261,29 +1254,60 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, } +/* Calculate level of detail for every fragment. + * Note that lambda has already been biased by global LOD bias. + */ +static INLINE void +compute_lod(const struct pipe_sampler_state *sampler, + const float biased_lambda, + const float lodbias[QUAD_SIZE], + float lod[QUAD_SIZE]) +{ + uint i; + + for (i = 0; i < QUAD_SIZE; i++) { + lod[i] = biased_lambda + lodbias[i]; + lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod); + } +} + + static void mip_filter_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else if (level0 >= texture->last_level) { samp->level = texture->last_level; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1292,10 +1316,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, int c,j; samp->level = level0; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1311,23 +1335,36 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; float lambda; + float lod[QUAD_SIZE]; - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { samp->level = (int)(lambda + 0.5) ; samp->level = MIN2(samp->level, (int)texture->last_level); - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } #if 0 @@ -1345,17 +1382,32 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - float lambda = samp->compute_lambda(samp, s, t, p, lodbias); + float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } } @@ -1371,15 +1423,28 @@ mip_filter_linear_2d_linear_repeat_POT( const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; - lambda = compute_lambda_2d(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; /* Catches both negative and large values of level0: @@ -1390,7 +1455,7 @@ mip_filter_linear_2d_linear_repeat_POT( else samp->level = texture->last_level; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1399,10 +1464,10 @@ mip_filter_linear_2d_linear_repeat_POT( int c,j; samp->level = level0; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1422,7 +1487,8 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1430,7 +1496,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, int j, k0, k1, k2, k3; float val; - samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba ); + samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba); /** * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' @@ -1508,7 +1574,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1589,7 +1656,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, * is not active, this will point somewhere deeper into the * pipeline, eg. to mip_filter or even img_filter. */ - samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba); + samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba); } @@ -1862,7 +1929,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, break; } - if (sampler->compare_mode != FALSE) { + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { samp->compare = sample_compare; } else { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index b0797711d37..b6e66c998ae 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -2,6 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -46,14 +47,14 @@ typedef void (*wrap_linear_func)(const float s[4], typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias); + const float p[QUAD_SIZE]); typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index c3de12b4a39..af99c9de37c 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -29,6 +29,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "util/u_upload_mgr.h" #include "svga_context.h" @@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe ) u_upload_destroy( svga->upload_vb ); u_upload_destroy( svga->upload_ib ); + util_bitmask_destroy( svga->vs_bm ); + util_bitmask_destroy( svga->fs_bm ); + for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader) pipe_buffer_reference( &svga->curr.cb[shader], NULL ); @@ -130,7 +134,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga = CALLOC_STRUCT(svga_context); if (svga == NULL) - goto error1; + goto no_svga; svga->pipe.winsys = screen->winsys; svga->pipe.screen = screen; @@ -142,7 +146,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->swc = svgascreen->sws->context_create(svgascreen->sws); if(!svga->swc) - goto error2; + goto no_swc; svga_init_blend_functions(svga); svga_init_blit_functions(svga); @@ -165,32 +169,40 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); if (!svga_init_swtnl(svga)) - goto error3; + goto no_swtnl; + + svga->fs_bm = util_bitmask_create(); + if (svga->fs_bm == NULL) + goto no_fs_bm; + + svga->vs_bm = util_bitmask_create(); + if (svga->vs_bm == NULL) + goto no_vs_bm; svga->upload_ib = u_upload_create( svga->pipe.screen, 32 * 1024, 16, PIPE_BUFFER_USAGE_INDEX ); if (svga->upload_ib == NULL) - goto error4; + goto no_upload_ib; svga->upload_vb = u_upload_create( svga->pipe.screen, 128 * 1024, 16, PIPE_BUFFER_USAGE_VERTEX ); if (svga->upload_vb == NULL) - goto error5; + goto no_upload_vb; svga->hwtnl = svga_hwtnl_create( svga, svga->upload_ib, svga->swc ); if (svga->hwtnl == NULL) - goto error6; + goto no_hwtnl; ret = svga_emit_initial_state( svga ); if (ret) - goto error7; + goto no_state; /* Avoid shortcircuiting state with initial value of zero. */ @@ -209,19 +221,23 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) return &svga->pipe; -error7: +no_state: svga_hwtnl_destroy( svga->hwtnl ); -error6: +no_hwtnl: u_upload_destroy( svga->upload_vb ); -error5: +no_upload_vb: u_upload_destroy( svga->upload_ib ); -error4: +no_upload_ib: + util_bitmask_destroy( svga->vs_bm ); +no_vs_bm: + util_bitmask_destroy( svga->fs_bm ); +no_fs_bm: svga_destroy_swtnl(svga); -error3: +no_swtnl: svga->swc->destroy(svga->swc); -error2: +no_swc: FREE(svga); -error1: +no_svga: return NULL; } diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 0885d9ca741..66259fd0103 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -41,6 +41,7 @@ struct draw_vertex_shader; struct svga_shader_result; struct SVGACmdMemory; +struct util_bitmask; struct u_upload_mgr; @@ -265,8 +266,6 @@ struct svga_hw_draw_state unsigned ts[16][TS_MAX]; float cb[PIPE_SHADER_TYPES][CB_MAX][4]; - unsigned shader_id[PIPE_SHADER_TYPES]; - struct svga_shader_result *fs; struct svga_shader_result *vs; struct svga_hw_view_state views[PIPE_MAX_SAMPLERS]; @@ -319,12 +318,14 @@ struct svga_context boolean new_vdecl; } swtnl; + /* Bitmask of used shader IDs */ + struct util_bitmask *fs_bm; + struct util_bitmask *vs_bm; + struct { unsigned dirty[4]; unsigned texture_timestamp; - unsigned next_fs_id; - unsigned next_vs_id; /* Internally generated shaders: */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 8db40d0fd57..ca73cf9d5a3 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", - svga_surface(svga->curr.framebuffer.cbufs[0])->handle, + svga->curr.framebuffer.cbufs[0] ? + svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, hwtnl->cmd.prim_count); ret = SVGA3D_BeginDrawPrimitives(swc, diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 71a552862e9..0f24ef4ee8d 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -149,7 +149,7 @@ retry: -static boolean +static void svga_draw_range_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe, enum pipe_error ret = 0; if (!u_trim_pipe_prim( prim, &count )) - return TRUE; + return; /* * Mark currently bound target surfaces as dirty @@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe, #ifdef DEBUG if (svga->curr.vs->base.id == svga->debug.disable_shader || svga->curr.fs->base.id == svga->debug.disable_shader) - return 0; + return; #endif if (svga->state.sw.need_swtnl) @@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe, svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); } - - return ret == PIPE_OK; } -static boolean +static void svga_draw_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - prim, start, count ); + svga_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + prim, start, count ); } -static boolean +static void svga_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements(pipe, NULL, 0, - start, start + count - 1, - prim, - start, count); + svga_draw_range_elements(pipe, NULL, 0, + start, start + count - 1, + prim, + start, count); } diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index e3be840d920..5f1213e46a3 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -107,7 +108,16 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->fs_bm, result->id ); + svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.fs) + svga->state.hw_draw.fs = NULL; } FREE((void *)fs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 78053e755e2..460a101f8c0 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -76,7 +76,6 @@ static INLINE unsigned translate_img_filter( unsigned filter ) switch (filter) { case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC; default: assert(0); return SVGA3D_TEX_FILTER_NEAREST; @@ -107,6 +106,8 @@ svga_create_sampler_state(struct pipe_context *pipe, cso->magfilter = translate_img_filter( sampler->mag_img_filter ); cso->minfilter = translate_img_filter( sampler->min_img_filter ); cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 ); + if(cso->aniso_level != 1) + cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC; cso->lod_bias = sampler->lod_bias; cso->addressu = translate_wrap_mode(sampler->wrap_s); cso->addressv = translate_wrap_mode(sampler->wrap_t); diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index c104c41f5f8..7e6ab576add 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -172,7 +173,16 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->vs_bm, result->id ); + svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.vs) + svga->state.hw_draw.vs = NULL; } FREE((void *)vs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 6ec38ed3e45..ec2886348b9 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "svga_context.h" #include "svga_state.h" @@ -39,8 +40,13 @@ static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a, const struct svga_fs_compile_key *b ) { - unsigned keysize = svga_fs_key_size( a ); - return memcmp( a, b, keysize ); + unsigned keysize_a = svga_fs_key_size( a ); + unsigned keysize_b = svga_fs_key_size( b ); + + if (keysize_a != keysize_b) { + return (int)(keysize_a - keysize_b); + } + return memcmp( a, b, keysize_a ); } @@ -66,7 +72,7 @@ static enum pipe_error compile_fs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_fragment_program( fs, key ); if (result == NULL) { @@ -74,9 +80,14 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->fs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + result->id, SVGA3D_SHADERTYPE_PS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -84,14 +95,16 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_fs_id++; result->next = fs->base.results; fs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->fs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -116,7 +129,7 @@ fail: */ static int emit_white_fs( struct svga_context *svga ) { - int ret; + int ret = PIPE_ERROR; /* ps_3_0 * def c0, 1.000000, 0.000000, 0.000000, 1.000000 @@ -137,16 +150,26 @@ static int emit_white_fs( struct svga_context *svga ) 0x0000ffff, }; + assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); + svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); + if(svga->state.white_fs_id == SVGA3D_INVALID_ID) + goto no_fs_id; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + svga->state.white_fs_id, SVGA3D_SHADERTYPE_PS, white_tokens, sizeof(white_tokens)); if (ret) - return ret; + goto no_definition; - svga->state.white_fs_id = svga->state.next_fs_id++; return 0; + +no_definition: + util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); + svga->state.white_fs_id = SVGA3D_INVALID_ID; +no_fs_id: + return ret; } @@ -251,15 +274,14 @@ static int emit_hw_fs( struct svga_context *svga, assert(id != SVGA3D_INVALID_ID); - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_PS, + if (result != svga->state.hw_draw.fs) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, id ); if (ret) return ret; svga->dirty |= SVGA_NEW_FS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; svga->state.hw_draw.fs = result; } diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 44b7ceb4fa4..e7e6c084321 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "translate/translate.h" #include "svga_context.h" @@ -70,7 +71,7 @@ static enum pipe_error compile_vs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret = PIPE_OK; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_vertex_program( vs, key ); if (result == NULL) { @@ -78,8 +79,14 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->vs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_vs_id, + result->id, SVGA3D_SHADERTYPE_VS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -87,14 +94,16 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_vs_id++; result->next = vs->base.results; vs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->vs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -142,15 +151,14 @@ static int emit_hw_vs( struct svga_context *svga, id = result->id; } - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_VS, + if (result != svga->state.hw_draw.vs) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, id ); if (ret) return ret; svga->dirty |= SVGA_NEW_VS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; svga->state.hw_draw.vs = result; } @@ -194,10 +202,12 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; key.element[0].input_offset = vel->src_offset; + key.element[0].instance_divisor = vel->instance_divisor; key.element[0].output_offset = const_idx * 4 * sizeof(float); translate_key_sanitize(&key); @@ -216,7 +226,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, vbuffer->stride); - translate->run(translate, 0, 1, + translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); pipe_buffer_unmap(svga->pipe.screen, diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 8b14c913f72..7655121bec1 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga, PIPE_BUFFER_USAGE_CPU_READ); assert(map); draw_set_mapped_constant_buffer( - draw, + draw, PIPE_SHADER_VERTEX, map, svga->curr.cb[PIPE_SHADER_VERTEX]->size); } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 25b8c2af3a0..94b6ccc62dd 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -156,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) memset(vdecl, 0, sizeof(vdecl)); /* always add position */ - src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->attrib[0].emit = EMIT_4F; vdecl[0].array.offset = offset; @@ -169,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) for (i = 0; i < fs->base.info.num_inputs; i++) { unsigned name = fs->base.info.input_semantic_name[i]; unsigned index = fs->base.info.input_semantic_index[i]; - src = draw_find_vs_output(draw, name, index); + src = draw_find_shader_output(draw, name, index); vdecl[nr_decls].array.offset = offset; vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i]; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index b8ef137c015..0cd620189b7 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "svgadump/svga_shader_dump.h" @@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader, result->tokens = (const unsigned *)emit.buf; result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); memcpy(&result->key, &key, sizeof key); + result->id = UTIL_BITMASK_INVALID_INDEX; if (SVGA_DEBUG & DEBUG_TGSI) { diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 896c90a89ae..737a2213af5 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -39,26 +39,24 @@ struct tgsi_token; struct svga_vs_compile_key { - ubyte need_prescale:1; - ubyte allow_psiz:1; unsigned zero_stride_vertex_elements; - ubyte num_zero_stride_vertex_elements:6; + unsigned need_prescale:1; + unsigned allow_psiz:1; + unsigned num_zero_stride_vertex_elements:6; }; struct svga_fs_compile_key { - boolean light_twoside:1; - boolean front_cw:1; - ubyte num_textures; - ubyte num_unnormalized_coords; + unsigned light_twoside:1; + unsigned front_cw:1; + unsigned num_textures:8; + unsigned num_unnormalized_coords:8; struct { - ubyte compare_mode : 1; - ubyte compare_func : 3; - ubyte unnormalized : 1; - - ubyte width_height_idx : 7; - - ubyte texture_target; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned unnormalized:1; + unsigned width_height_idx:7; + unsigned texture_target:8; } tex[PIPE_MAX_SAMPLERS]; }; @@ -121,8 +119,7 @@ static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key ) static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key ) { - return (const char *)&key->tex[key->num_textures].texture_target - - (const char *)key; + return (const char *)&key->tex[key->num_textures] - (const char *)key; } struct svga_shader_result * diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 1670da8bfa9..dc5eb8fc606 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_I2F: case TGSI_OPCODE_NOT: case TGSI_OPCODE_SHL: - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: case TGSI_OPCODE_XOR: return FALSE; diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index e6d4a74e868..d59fb89a58c 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1444,6 +1444,312 @@ dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd) void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; + + switch(cmd_id) { + case SVGA_3D_CMD_SURFACE_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + { + const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; + dump_SVGA3dCmdDefineSurface(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dSize) <= next) { + dump_SVGA3dSize((const SVGA3dSize *)body); + body += sizeof(SVGA3dSize); + } + } + break; + case SVGA_3D_CMD_SURFACE_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + { + const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; + dump_SVGA3dCmdDestroySurface(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_COPY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + { + const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; + dump_SVGA3dCmdSurfaceCopy(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + } + break; + case SVGA_3D_CMD_SURFACE_STRETCHBLT: + _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + { + const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; + dump_SVGA3dCmdSurfaceStretchBlt(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_DMA: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + { + const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; + dump_SVGA3dCmdSurfaceDMA(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { + dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); + body += sizeof(SVGA3dCmdSurfaceDMASuffix); + } + } + break; + case SVGA_3D_CMD_CONTEXT_DEFINE: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + { + const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; + dump_SVGA3dCmdDefineContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CONTEXT_DESTROY: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + { + const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; + dump_SVGA3dCmdDestroyContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTRANSFORM: + _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + { + const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; + dump_SVGA3dCmdSetTransform(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETZRANGE: + _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + { + const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; + dump_SVGA3dCmdSetZRange(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETRENDERSTATE: + _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + { + const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; + dump_SVGA3dCmdSetRenderState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRenderState) <= next) { + dump_SVGA3dRenderState((const SVGA3dRenderState *)body); + body += sizeof(SVGA3dRenderState); + } + } + break; + case SVGA_3D_CMD_SETRENDERTARGET: + _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + { + const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; + dump_SVGA3dCmdSetRenderTarget(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTEXTURESTATE: + _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + { + const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; + dump_SVGA3dCmdSetTextureState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dTextureState) <= next) { + dump_SVGA3dTextureState((const SVGA3dTextureState *)body); + body += sizeof(SVGA3dTextureState); + } + } + break; + case SVGA_3D_CMD_SETMATERIAL: + _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + { + const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; + dump_SVGA3dCmdSetMaterial(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTDATA: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + { + const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; + dump_SVGA3dCmdSetLightData(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTENABLED: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + { + const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; + dump_SVGA3dCmdSetLightEnabled(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETVIEWPORT: + _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + { + const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; + dump_SVGA3dCmdSetViewport(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETCLIPPLANE: + _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + { + const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; + dump_SVGA3dCmdSetClipPlane(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CLEAR: + _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + { + const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; + dump_SVGA3dCmdClear(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRect) <= next) { + dump_SVGA3dRect((const SVGA3dRect *)body); + body += sizeof(SVGA3dRect); + } + } + break; + case SVGA_3D_CMD_PRESENT: + _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + { + const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; + dump_SVGA3dCmdPresent(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyRect) <= next) { + dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); + body += sizeof(SVGA3dCopyRect); + } + } + break; + case SVGA_3D_CMD_SHADER_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + { + const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; + dump_SVGA3dCmdDefineShader(cmd); + body = (const uint8_t *)&cmd[1]; + svga_shader_dump((const uint32_t *)body, + (unsigned)(next - body)/sizeof(uint32_t), + FALSE ); + body = next; + } + break; + case SVGA_3D_CMD_SHADER_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + { + const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; + dump_SVGA3dCmdDestroyShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + { + const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; + dump_SVGA3dCmdSetShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER_CONST: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + { + const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; + dump_SVGA3dCmdSetShaderConst(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_DRAW_PRIMITIVES: + _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + { + const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; + unsigned i, j; + dump_SVGA3dCmdDrawPrimitives(cmd); + body = (const uint8_t *)&cmd[1]; + for(i = 0; i < cmd->numVertexDecls; ++i) { + dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); + body += sizeof(SVGA3dVertexDecl); + } + for(j = 0; j < cmd->numRanges; ++j) { + dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); + body += sizeof(SVGA3dPrimitiveRange); + } + while(body + sizeof(SVGA3dVertexDivisor) <= next) { + dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); + body += sizeof(SVGA3dVertexDivisor); + } + } + break; + case SVGA_3D_CMD_SETSCISSORRECT: + _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + { + const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; + dump_SVGA3dCmdSetScissorRect(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BEGIN_QUERY: + _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + { + const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; + dump_SVGA3dCmdBeginQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_END_QUERY: + _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + { + const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; + dump_SVGA3dCmdEndQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_WAIT_FOR_QUERY: + _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + { + const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; + dump_SVGA3dCmdWaitForQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: + _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); + { + const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; + dump_SVGA3dCmdBlitSurfaceToScreen(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGASignedRect) <= next) { + dump_SVGASignedRect((const SVGASignedRect *)body); + body += sizeof(SVGASignedRect); + } + } + break; + default: + _debug_printf("\t0x%08x\n", cmd_id); + break; + } + + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} + + +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -1458,307 +1764,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; - switch(cmd_id) { - case SVGA_3D_CMD_SURFACE_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); - { - const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; - dump_SVGA3dCmdDefineSurface(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dSize) <= next) { - dump_SVGA3dSize((const SVGA3dSize *)body); - body += sizeof(SVGA3dSize); - } - } - break; - case SVGA_3D_CMD_SURFACE_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); - { - const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; - dump_SVGA3dCmdDestroySurface(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_COPY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); - { - const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; - dump_SVGA3dCmdSurfaceCopy(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - } - break; - case SVGA_3D_CMD_SURFACE_STRETCHBLT: - _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); - { - const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; - dump_SVGA3dCmdSurfaceStretchBlt(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_DMA: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); - { - const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; - dump_SVGA3dCmdSurfaceDMA(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { - dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); - body += sizeof(SVGA3dCmdSurfaceDMASuffix); - } - } - break; - case SVGA_3D_CMD_CONTEXT_DEFINE: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); - { - const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; - dump_SVGA3dCmdDefineContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CONTEXT_DESTROY: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); - { - const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; - dump_SVGA3dCmdDestroyContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTRANSFORM: - _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); - { - const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; - dump_SVGA3dCmdSetTransform(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETZRANGE: - _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); - { - const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; - dump_SVGA3dCmdSetZRange(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETRENDERSTATE: - _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); - { - const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; - dump_SVGA3dCmdSetRenderState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRenderState) <= next) { - dump_SVGA3dRenderState((const SVGA3dRenderState *)body); - body += sizeof(SVGA3dRenderState); - } - } - break; - case SVGA_3D_CMD_SETRENDERTARGET: - _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); - { - const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; - dump_SVGA3dCmdSetRenderTarget(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTEXTURESTATE: - _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); - { - const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; - dump_SVGA3dCmdSetTextureState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dTextureState) <= next) { - dump_SVGA3dTextureState((const SVGA3dTextureState *)body); - body += sizeof(SVGA3dTextureState); - } - } - break; - case SVGA_3D_CMD_SETMATERIAL: - _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); - { - const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; - dump_SVGA3dCmdSetMaterial(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTDATA: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); - { - const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; - dump_SVGA3dCmdSetLightData(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTENABLED: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); - { - const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; - dump_SVGA3dCmdSetLightEnabled(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETVIEWPORT: - _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); - { - const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; - dump_SVGA3dCmdSetViewport(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETCLIPPLANE: - _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); - { - const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; - dump_SVGA3dCmdSetClipPlane(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CLEAR: - _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); - { - const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; - dump_SVGA3dCmdClear(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRect) <= next) { - dump_SVGA3dRect((const SVGA3dRect *)body); - body += sizeof(SVGA3dRect); - } - } - break; - case SVGA_3D_CMD_PRESENT: - _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); - { - const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; - dump_SVGA3dCmdPresent(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyRect) <= next) { - dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); - body += sizeof(SVGA3dCopyRect); - } - } - break; - case SVGA_3D_CMD_SHADER_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); - { - const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; - dump_SVGA3dCmdDefineShader(cmd); - body = (const uint8_t *)&cmd[1]; - svga_shader_dump((const uint32_t *)body, - (unsigned)(next - body)/sizeof(uint32_t), - FALSE ); - body = next; - } - break; - case SVGA_3D_CMD_SHADER_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); - { - const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; - dump_SVGA3dCmdDestroyShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); - { - const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; - dump_SVGA3dCmdSetShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER_CONST: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); - { - const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; - dump_SVGA3dCmdSetShaderConst(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_DRAW_PRIMITIVES: - _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); - { - const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; - unsigned i, j; - dump_SVGA3dCmdDrawPrimitives(cmd); - body = (const uint8_t *)&cmd[1]; - for(i = 0; i < cmd->numVertexDecls; ++i) { - dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); - body += sizeof(SVGA3dVertexDecl); - } - for(j = 0; j < cmd->numRanges; ++j) { - dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); - body += sizeof(SVGA3dPrimitiveRange); - } - while(body + sizeof(SVGA3dVertexDivisor) <= next) { - dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); - body += sizeof(SVGA3dVertexDivisor); - } - } - break; - case SVGA_3D_CMD_SETSCISSORRECT: - _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); - { - const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; - dump_SVGA3dCmdSetScissorRect(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BEGIN_QUERY: - _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); - { - const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; - dump_SVGA3dCmdBeginQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_END_QUERY: - _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); - { - const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; - dump_SVGA3dCmdEndQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_WAIT_FOR_QUERY: - _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); - { - const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; - dump_SVGA3dCmdWaitForQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: - _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); - { - const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; - dump_SVGA3dCmdBlitSurfaceToScreen(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGASignedRect) <= next) { - dump_SVGASignedRect((const SVGASignedRect *)body); - body += sizeof(SVGASignedRect); - } - } - break; - default: - _debug_printf("\t0x%08x\n", cmd_id); - break; - } - - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h index 69a87020875..ca0154361cc 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.h +++ b/src/gallium/drivers/svga/svgadump/svga_dump.h @@ -28,6 +28,9 @@ #include "pipe/p_compiler.h" +void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size); + void svga_dump_commands(const void *commands, uint32_t size); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index a1ada29ef84..0bc0b3ae317 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -208,6 +208,56 @@ cmds = [ def dump_cmds(): print r''' void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; +''' + print ' switch(cmd_id) {' + indexes = 'ijklmn' + for id, header, body, footer in cmds: + print ' case %s:' % id + print ' _debug_printf("\\t%s\\n");' % id + print ' {' + print ' const %s *cmd = (const %s *)body;' % (header, header) + if len(body): + print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' + print ' dump_%s(cmd);' % header + print ' body = (const uint8_t *)&cmd[1];' + for i in range(len(body)): + struct, count = body[i] + idx = indexes[i] + print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) + print ' dump_%s((const %s *)body);' % (struct, struct) + print ' body += sizeof(%s);' % struct + print ' }' + if footer is not None: + print ' while(body + sizeof(%s) <= next) {' % footer + print ' dump_%s((const %s *)body);' % (footer, footer) + print ' body += sizeof(%s);' % footer + print ' }' + if id == 'SVGA_3D_CMD_SHADER_DEFINE': + print ' svga_shader_dump((const uint32_t *)body,' + print ' (unsigned)(next - body)/sizeof(uint32_t),' + print ' FALSE);' + print ' body = next;' + print ' }' + print ' break;' + print ' default:' + print ' _debug_printf("\\t0x%08x\\n", cmd_id);' + print ' break;' + print ' }' + print r''' + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} +''' + print r''' +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -222,51 +272,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; -''' - print ' switch(cmd_id) {' - indexes = 'ijklmn' - for id, header, body, footer in cmds: - print ' case %s:' % id - print ' _debug_printf("\\t%s\\n");' % id - print ' {' - print ' const %s *cmd = (const %s *)body;' % (header, header) - if len(body): - print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' - print ' dump_%s(cmd);' % header - print ' body = (const uint8_t *)&cmd[1];' - for i in range(len(body)): - struct, count = body[i] - idx = indexes[i] - print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) - print ' dump_%s((const %s *)body);' % (struct, struct) - print ' body += sizeof(%s);' % struct - print ' }' - if footer is not None: - print ' while(body + sizeof(%s) <= next) {' % footer - print ' dump_%s((const %s *)body);' % (footer, footer) - print ' body += sizeof(%s);' % footer - print ' }' - if id == 'SVGA_3D_CMD_SHADER_DEFINE': - print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));' - print ' body = next;' - print ' }' - print ' break;' - print ' default:' - print ' _debug_printf("\\t0x%08x\\n", cmd_id);' - print ' break;' - print ' }' - - print r''' - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 1000c31e49a..203c3851bc3 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing ldd progs/trivial/tri -== Traceing == +== Tracing == -For traceing then do +For tracing then do - export XMESA_TRACE=y GALLIUM_TRACE=tri.trace progs/trivial/tri which should create a tri.trace file, which is an XML file. You can view copying diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 8008b596c37..5a9f0fc6901 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx, assert(tr_buf->buffer); assert(tr_buf->buffer->screen == tr_scr->screen); + (void) tr_scr; return tr_buf->buffer; } @@ -90,6 +91,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, assert(tr_surf->surface); assert(tr_surf->surface->texture->screen == tr_scr->screen); + (void) tr_scr; return tr_surf->surface; } @@ -159,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) pipe_mutex_unlock(tr_ctx->draw_mutex); } -static INLINE boolean +static INLINE void trace_context_draw_arrays(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -179,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_arrays(pipe, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_arrays(pipe, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -201,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -219,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -245,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -265,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, minIndex, maxIndex, - mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 7e2ccbcfdc5..0f45e211a32 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -40,7 +40,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) #include <stdlib.h> #endif @@ -258,7 +258,7 @@ boolean trace_dump_trace_begin() trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); trace_dump_writes("<trace version='0.1'>\n"); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) /* Linux applications rarely cleanup GL / Gallium resources so catch * application exit here */ atexit(trace_dump_trace_close); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 5794c902988..32f61f8c944 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -409,7 +409,7 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state) trace_dump_member(uint, state, min_img_filter); trace_dump_member(uint, state, min_mip_filter); trace_dump_member(uint, state, mag_img_filter); - trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_mode); trace_dump_member(uint, state, compare_func); trace_dump_member(bool, state, normalized_coords); trace_dump_member(uint, state, prefilter); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index c31b1d86986..0546aad9b50 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -45,7 +45,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define sleep Sleep -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE) void usleep(int); # define sleep usleep #else @@ -180,7 +180,7 @@ static int trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial) { struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header; struct tr_list *ptr; struct pipe_texture *t; @@ -223,7 +223,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header; struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct tr_list *ptr; struct pipe_screen *screen = tr_scr->screen; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index ac20a47af1e..117503aaff6 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -426,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen, struct pipe_transfer *transfer = tr_trans->transfer; if(tr_trans->map) { - size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride; + size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; trace_dump_call_begin("pipe_screen", "transfer_write"); diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index 1c16042ee5a..e2f981d0513 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -32,7 +32,7 @@ struct tgsi_token; enum trace_shader_type { TRACE_SHADER_FRAGMENT = 0, TRACE_SHADER_VERTEX = 1, - TRACE_SHADER_GEOMETRY = 2, + TRACE_SHADER_GEOMETRY = 2 }; struct trace_shader diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index f7368bb95b3..18ebd0c9483 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -52,45 +52,15 @@ #endif /* _MSC_VER */ -#if defined(_MSC_VER) - -typedef __int8 int8_t; -typedef unsigned __int8 uint8_t; -typedef __int16 int16_t; -typedef unsigned __int16 uint16_t; -#ifndef __eglplatform_h_ -typedef __int32 int32_t; -#endif -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; - -#if defined(_WIN64) -typedef __int64 intptr_t; -typedef unsigned __int64 uintptr_t; -#else -typedef __int32 intptr_t; -typedef unsigned __int32 uintptr_t; -#endif - -#define INT64_C(__val) __val##i64 -#define UINT64_C(__val) __val##ui64 - -#ifndef __cplusplus -#define false 0 -#define true 1 -#define bool _Bool -typedef int _Bool; -#define __bool_true_false_are_defined 1 -#endif /* !__cplusplus */ - -#else +/* + * Alternative stdint.h and stdbool.h headers are supplied in include/c99 for + * systems that lack it. + */ #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS 1 #endif #include <stdint.h> #include <stdbool.h> -#endif #ifndef __HAIKU__ @@ -99,11 +69,7 @@ typedef unsigned short ushort; #endif typedef unsigned char ubyte; -#if 0 -#define boolean bool -#else typedef unsigned char boolean; -#endif #ifndef TRUE #define TRUE true #endif @@ -135,6 +101,17 @@ typedef unsigned char boolean; # endif #endif + +/* Function visibility */ +#ifndef PUBLIC +# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +# define PUBLIC __attribute__((visibility("default"))) +# else +# define PUBLIC +# endif +#endif + + /* The __FUNCTION__ gcc variable is generally only used for debugging. * If we're not using gcc, define __FUNCTION__ as a cpp symbol here. */ @@ -162,22 +139,33 @@ typedef unsigned char boolean; +/* Macros for data alignment. */ #if defined(__GNUC__) -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) -#define ALIGN16_ASSIGN(NAME) NAME##___aligned -#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) -#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */ +#define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment))) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */ +#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment))) + #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) -#define ALIGN_STACK __attribute__((force_align_arg_pointer)) +#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer)) #else -#define ALIGN_STACK +#define PIPE_ALIGN_STACK #endif + +#elif defined(_MSC_VER) + +/* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */ +#define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type +#define PIPE_ALIGN_VAR(_alignment) __declspec(align(_alignment)) + +#define PIPE_ALIGN_STACK + #else -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] -#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) -#define ALIGN16_ATTRIB -#define ALIGN8_ATTRIB -#define ALIGN_STACK + +#error "Unsupported compiler" + #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 9d19ec2f7fe..0b8f6da2f4a 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -61,29 +61,53 @@ struct pipe_context { * VBO drawing (return false on fallbacks (temporary??)) */ /*@{*/ - boolean (*draw_arrays)( struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - boolean (*draw_elements)( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); + void (*draw_arrays)( struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count); + + void (*draw_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + void (*draw_arrays_instanced)(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void (*draw_elements_instanced)(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. * Using this to quickly evaluate a specialized path in the draw * module. */ - boolean (*draw_range_elements)( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); + void (*draw_range_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); /*@}*/ + /** + * Predicate subsequent rendering on occlusion query result + * \param query the query predicate, or NULL if no predicate + * \param mode one of PIPE_COND_RENDER_x + */ + void (*render_condition)( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ); /** * Query objects @@ -142,6 +166,12 @@ struct pipe_context { const struct pipe_shader_state *); void (*bind_vs_state)(struct pipe_context *, void *); void (*delete_vs_state)(struct pipe_context *, void *); + + void * (*create_gs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*bind_gs_state)(struct pipe_context *, void *); + void (*delete_gs_state)(struct pipe_context *, void *); + /*@}*/ /** diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index fe1390d765f..35f3830ebcf 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -171,8 +171,6 @@ enum pipe_texture_target { */ #define PIPE_TEX_FILTER_NEAREST 0 #define PIPE_TEX_FILTER_LINEAR 1 -#define PIPE_TEX_FILTER_ANISO 2 - #define PIPE_TEX_COMPARE_NONE 0 #define PIPE_TEX_COMPARE_R_TO_TEXTURE 1 @@ -321,23 +319,28 @@ enum pipe_transfer_usage { */ #define PIPE_SHADER_VERTEX 0 #define PIPE_SHADER_FRAGMENT 1 -#define PIPE_SHADER_TYPES 2 +#define PIPE_SHADER_GEOMETRY 2 +#define PIPE_SHADER_TYPES 3 /** * Primitive types: */ -#define PIPE_PRIM_POINTS 0 -#define PIPE_PRIM_LINES 1 -#define PIPE_PRIM_LINE_LOOP 2 -#define PIPE_PRIM_LINE_STRIP 3 -#define PIPE_PRIM_TRIANGLES 4 -#define PIPE_PRIM_TRIANGLE_STRIP 5 -#define PIPE_PRIM_TRIANGLE_FAN 6 -#define PIPE_PRIM_QUADS 7 -#define PIPE_PRIM_QUAD_STRIP 8 -#define PIPE_PRIM_POLYGON 9 -#define PIPE_PRIM_MAX 10 +#define PIPE_PRIM_POINTS 0 +#define PIPE_PRIM_LINES 1 +#define PIPE_PRIM_LINE_LOOP 2 +#define PIPE_PRIM_LINE_STRIP 3 +#define PIPE_PRIM_TRIANGLES 4 +#define PIPE_PRIM_TRIANGLE_STRIP 5 +#define PIPE_PRIM_TRIANGLE_FAN 6 +#define PIPE_PRIM_QUADS 7 +#define PIPE_PRIM_QUAD_STRIP 8 +#define PIPE_PRIM_POLYGON 9 +#define PIPE_PRIM_LINES_ADJACENCY 10 +#define PIPE_PRIM_LINE_STRIP_ADJACENCY 11 +#define PIPE_PRIM_TRIANGLES_ADJACENCY 12 +#define PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY 13 +#define PIPE_PRIM_MAX 14 /** @@ -350,6 +353,15 @@ enum pipe_transfer_usage { /** + * Conditional rendering modes + */ +#define PIPE_RENDER_COND_WAIT 0 +#define PIPE_RENDER_COND_NO_WAIT 1 +#define PIPE_RENDER_COND_BY_REGION_WAIT 2 +#define PIPE_RENDER_COND_BY_REGION_NO_WAIT 3 + + +/** * Point sprite coord modes */ #define PIPE_SPRITE_COORD_NONE 0 diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index f0a4de5df33..b8e001a6b01 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -266,6 +266,11 @@ struct pipe_screen { void (*video_surface_destroy)( struct pipe_video_surface *vsfc ); + /** + * Do any special operations to ensure buffer size is correct + */ + void (*update_buffer)( struct pipe_screen *ws, + void *context_private ); /** * Do any special operations to ensure frontbuffer contents are diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 5da85bbbc24..b489b044667 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -121,16 +121,18 @@ struct tgsi_declaration_range unsigned Last : 16; /**< UINT */ }; -#define TGSI_SEMANTIC_POSITION 0 -#define TGSI_SEMANTIC_COLOR 1 -#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define TGSI_SEMANTIC_FOG 3 -#define TGSI_SEMANTIC_PSIZE 4 -#define TGSI_SEMANTIC_GENERIC 5 -#define TGSI_SEMANTIC_NORMAL 6 -#define TGSI_SEMANTIC_FACE 7 -#define TGSI_SEMANTIC_EDGEFLAG 8 -#define TGSI_SEMANTIC_COUNT 9 /**< number of semantic values */ +#define TGSI_SEMANTIC_POSITION 0 +#define TGSI_SEMANTIC_COLOR 1 +#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define TGSI_SEMANTIC_FOG 3 +#define TGSI_SEMANTIC_PSIZE 4 +#define TGSI_SEMANTIC_GENERIC 5 +#define TGSI_SEMANTIC_NORMAL 6 +#define TGSI_SEMANTIC_FACE 7 +#define TGSI_SEMANTIC_EDGEFLAG 8 +#define TGSI_SEMANTIC_PRIMID 9 +#define TGSI_SEMANTIC_INSTANCEID 10 +#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ struct tgsi_declaration_semantic { @@ -140,6 +142,8 @@ struct tgsi_declaration_semantic }; #define TGSI_IMM_FLOAT32 0 +#define TGSI_IMM_UINT32 1 +#define TGSI_IMM_INT32 2 struct tgsi_immediate { @@ -152,6 +156,8 @@ struct tgsi_immediate union tgsi_immediate_data { float Float; + unsigned Uint; + int Int; }; #define TGSI_PROPERTY_GS_INPUT_PRIM 0 @@ -263,7 +269,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_NOT 85 #define TGSI_OPCODE_TRUNC 86 #define TGSI_OPCODE_SHL 87 -#define TGSI_OPCODE_SHR 88 + /* gap */ #define TGSI_OPCODE_AND 89 #define TGSI_OPCODE_OR 90 #define TGSI_OPCODE_MOD 91 @@ -288,7 +294,33 @@ struct tgsi_property_data { #define TGSI_OPCODE_KIL 116 /* conditional kill */ #define TGSI_OPCODE_END 117 /* aka HALT */ /* gap */ -#define TGSI_OPCODE_LAST 119 +#define TGSI_OPCODE_F2I 119 +#define TGSI_OPCODE_IDIV 120 +#define TGSI_OPCODE_IMAX 121 +#define TGSI_OPCODE_IMIN 122 +#define TGSI_OPCODE_INEG 123 +#define TGSI_OPCODE_ISGE 124 +#define TGSI_OPCODE_ISHR 125 +#define TGSI_OPCODE_ISLT 126 +#define TGSI_OPCODE_F2U 127 +#define TGSI_OPCODE_U2F 128 +#define TGSI_OPCODE_UADD 129 +#define TGSI_OPCODE_UDIV 130 +#define TGSI_OPCODE_UMAD 131 +#define TGSI_OPCODE_UMAX 132 +#define TGSI_OPCODE_UMIN 133 +#define TGSI_OPCODE_UMOD 134 +#define TGSI_OPCODE_UMUL 135 +#define TGSI_OPCODE_USEQ 136 +#define TGSI_OPCODE_USGE 137 +#define TGSI_OPCODE_USHR 138 +#define TGSI_OPCODE_USLT 139 +#define TGSI_OPCODE_USNE 140 +#define TGSI_OPCODE_SWITCH 141 +#define TGSI_OPCODE_CASE 142 +#define TGSI_OPCODE_DEFAULT 143 +#define TGSI_OPCODE_ENDSWITCH 144 +#define TGSI_OPCODE_LAST 145 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index f3d580d8d6d..fdd29ed4492 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -66,10 +66,6 @@ extern "C" { #define PIPE_MAX_TEXTURE_LEVELS 16 -/* fwd decls */ -struct pipe_surface; - - /** * The driver will certainly subclass this to include actual memory * management information. @@ -367,6 +363,11 @@ struct pipe_vertex_element /** Offset of this attribute, in bytes, from the start of the vertex */ unsigned src_offset; + /** Instance data rate divisor. 0 means this is per-vertex data, + * n means per-instance data used for n consecutive instances (n > 0). + */ + unsigned instance_divisor; + /** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does * this attribute live in? */ diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h index 4d1259e1ee7..bb928928c90 100644 --- a/src/gallium/include/state_tracker/drm_api.h +++ b/src/gallium/include/state_tracker/drm_api.h @@ -28,6 +28,8 @@ struct drm_create_screen_arg { struct drm_api { + const char *name; + /** * Special buffer functions */ diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index 8819936fcaf..f2e5f3fb23c 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -44,9 +44,9 @@ GLboolean dri_create_context(const __GLcontextModes * visual, - __DRIcontextPrivate * cPriv, void *sharedContextPrivate) + __DRIcontext * cPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = cPriv->driScreenPriv; + __DRIscreen *sPriv = cPriv->driScreenPriv; struct dri_screen *screen = dri_screen(sPriv); struct dri_context *ctx = NULL; struct st_context *st_share = NULL; @@ -97,7 +97,7 @@ dri_create_context(const __GLcontextModes * visual, } void -dri_destroy_context(__DRIcontextPrivate * cPriv) +dri_destroy_context(__DRIcontext * cPriv) { struct dri_context *ctx = dri_context(cPriv); @@ -116,7 +116,7 @@ dri_destroy_context(__DRIcontextPrivate * cPriv) } GLboolean -dri_unbind_context(__DRIcontextPrivate * cPriv) +dri_unbind_context(__DRIcontext * cPriv) { if (cPriv) { struct dri_context *ctx = dri_context(cPriv); @@ -133,9 +133,9 @@ dri_unbind_context(__DRIcontextPrivate * cPriv) } GLboolean -dri_make_current(__DRIcontextPrivate * cPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) +dri_make_current(__DRIcontext * cPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { if (cPriv) { struct dri_context *ctx = dri_context(cPriv); diff --git a/src/gallium/state_trackers/dri/dri_context.h b/src/gallium/state_trackers/dri/dri_context.h index 46501787340..13f497462f7 100644 --- a/src/gallium/state_trackers/dri/dri_context.h +++ b/src/gallium/state_trackers/dri/dri_context.h @@ -44,10 +44,10 @@ struct dri_drawable; struct dri_context { /* dri */ - __DRIscreenPrivate *sPriv; - __DRIcontextPrivate *cPriv; - __DRIdrawablePrivate *dPriv; - __DRIdrawablePrivate *rPriv; + __DRIscreen *sPriv; + __DRIcontext *cPriv; + __DRIdrawable *dPriv; + __DRIdrawable *rPriv; driOptionCache optionCache; @@ -67,7 +67,7 @@ struct dri_context }; static INLINE struct dri_context * -dri_context(__DRIcontextPrivate * driContextPriv) +dri_context(__DRIcontext * driContextPriv) { return (struct dri_context *)driContextPriv->driverPrivate; } @@ -99,18 +99,18 @@ dri_unlock(struct dri_context *ctx) */ extern struct dri1_api_lock_funcs dri1_lf; -void dri_destroy_context(__DRIcontextPrivate * driContextPriv); +void dri_destroy_context(__DRIcontext * driContextPriv); -boolean dri_unbind_context(__DRIcontextPrivate * driContextPriv); +boolean dri_unbind_context(__DRIcontext * driContextPriv); boolean -dri_make_current(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); +dri_make_current(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv); boolean dri_create_context(const __GLcontextModes * visual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); /*********************************************************************** diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 4b12243ddff..0fdfa96b35a 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -118,7 +118,7 @@ dri2_check_if_pixmap(__DRIbuffer *buffers, int count) * This will be called a drawable is known to have been resized. */ void -dri_get_buffers(__DRIdrawablePrivate * dPriv) +dri_get_buffers(__DRIdrawable * dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); @@ -180,6 +180,7 @@ dri_get_buffers(__DRIdrawablePrivate * dPriv) switch (buffers[i].attachment) { case __DRI_BUFFER_FRONT_LEFT: + continue; case __DRI_BUFFER_FAKE_FRONT_LEFT: index = ST_SURFACE_FRONT_LEFT; format = drawable->color_format; @@ -268,6 +269,14 @@ void dri2_set_tex_buffer(__DRIcontext *pDRICtx, GLint target, } void +dri_update_buffer(struct pipe_screen *screen, void *context_private) +{ + struct dri_context *ctx = (struct dri_context *)context_private; + + dri_get_buffers(ctx->dPriv); +} + +void dri_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surf, void *context_private) { @@ -299,8 +308,8 @@ dri_flush_frontbuffer(struct pipe_screen *screen, * This is called when we need to set up GL rendering to a new X window. */ boolean -dri_create_buffer(__DRIscreenPrivate * sPriv, - __DRIdrawablePrivate * dPriv, +dri_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, const __GLcontextModes * visual, boolean isPixmap) { struct dri_screen *screen = sPriv->private; @@ -364,6 +373,7 @@ dri_create_buffer(__DRIscreenPrivate * sPriv, /* TODO incase of double buffer visual, delay fake creation */ i = 0; drawable->attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT; if (visual->doubleBufferMode) drawable->attachments[i++] = __DRI_BUFFER_BACK_LEFT; @@ -416,7 +426,7 @@ dri_swap_fences_push_back(struct dri_drawable *draw, } void -dri_destroy_buffer(__DRIdrawablePrivate * dPriv) +dri_destroy_buffer(__DRIdrawable * dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); struct pipe_fence_handle *fence; @@ -434,8 +444,8 @@ dri_destroy_buffer(__DRIdrawablePrivate * dPriv) static void dri1_update_drawables_locked(struct dri_context *ctx, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { if (ctx->stLostLock) { ctx->stLostLock = FALSE; @@ -458,8 +468,8 @@ dri1_update_drawables_locked(struct dri_context *ctx, static void dri1_propagate_drawable_change(struct dri_context *ctx) { - __DRIdrawablePrivate *dPriv = ctx->dPriv; - __DRIdrawablePrivate *rPriv = ctx->rPriv; + __DRIdrawable *dPriv = ctx->dPriv; + __DRIdrawable *rPriv = ctx->rPriv; boolean flushed = FALSE; if (dPriv && ctx->d_stamp != dPriv->lastStamp) { @@ -532,7 +542,7 @@ static void dri1_swap_copy(struct dri_context *ctx, struct pipe_surface *dst, struct pipe_surface *src, - __DRIdrawablePrivate * dPriv, const struct drm_clip_rect *bbox) + __DRIdrawable * dPriv, const struct drm_clip_rect *bbox) { struct pipe_context *pipe = ctx->pipe; struct drm_clip_rect clip; @@ -563,7 +573,7 @@ dri1_swap_copy(struct dri_context *ctx, static void dri1_copy_to_front(struct dri_context *ctx, struct pipe_surface *surf, - __DRIdrawablePrivate * dPriv, + __DRIdrawable * dPriv, const struct drm_clip_rect *sub_box, struct pipe_fence_handle **fence) { @@ -636,7 +646,7 @@ dri1_flush_frontbuffer(struct pipe_screen *screen, } void -dri_swap_buffers(__DRIdrawablePrivate * dPriv) +dri_swap_buffers(__DRIdrawable * dPriv) { struct dri_context *ctx; struct pipe_surface *back_surf; @@ -668,7 +678,7 @@ dri_swap_buffers(__DRIdrawablePrivate * dPriv) } void -dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) +dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h) { struct pipe_screen *screen = dri_screen(dPriv->driScreenPriv)->pipe_screen; struct drm_clip_rect sub_bbox; diff --git a/src/gallium/state_trackers/dri/dri_drawable.h b/src/gallium/state_trackers/dri/dri_drawable.h index b910930db42..8bc59cb4c3d 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.h +++ b/src/gallium/state_trackers/dri/dri_drawable.h @@ -41,8 +41,8 @@ struct dri_context; struct dri_drawable { /* dri */ - __DRIdrawablePrivate *dPriv; - __DRIscreenPrivate *sPriv; + __DRIdrawable *dPriv; + __DRIscreen *sPriv; unsigned attachments[8]; unsigned num_attachments; @@ -67,7 +67,7 @@ struct dri_drawable }; static INLINE struct dri_drawable * -dri_drawable(__DRIdrawablePrivate * driDrawPriv) +dri_drawable(__DRIdrawable * driDrawPriv) { return (struct dri_drawable *)driDrawPriv->driverPrivate; } @@ -76,22 +76,25 @@ dri_drawable(__DRIdrawablePrivate * driDrawPriv) * dri_drawable.c */ boolean -dri_create_buffer(__DRIscreenPrivate * sPriv, - __DRIdrawablePrivate * dPriv, +dri_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, const __GLcontextModes * visual, boolean isPixmap); void +dri_update_buffer(struct pipe_screen *screen, void *context_private); + +void dri_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surf, void *context_private); -void dri_swap_buffers(__DRIdrawablePrivate * dPriv); +void dri_swap_buffers(__DRIdrawable * dPriv); void -dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h); +dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h); -void dri_get_buffers(__DRIdrawablePrivate * dPriv); +void dri_get_buffers(__DRIdrawable * dPriv); -void dri_destroy_buffer(__DRIdrawablePrivate * dPriv); +void dri_destroy_buffer(__DRIdrawable * dPriv); void dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv); diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index cb864d45d51..793db087ee1 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -202,7 +202,7 @@ dri_fill_in_modes(struct dri_screen *screen, * Get information about previous buffer swaps. */ static int -dri_get_swap_info(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo) +dri_get_swap_info(__DRIdrawable * dPriv, __DRIswapInfo * sInfo) { if (dPriv == NULL || dPriv->driverPrivate == NULL || sInfo == NULL) return -1; @@ -220,7 +220,7 @@ dri_copy_version(struct dri1_api_version *dst, } static const __DRIconfig ** -dri_init_screen(__DRIscreenPrivate * sPriv) +dri_init_screen(__DRIscreen * sPriv) { struct dri_screen *screen; const __DRIconfig **configs; @@ -285,7 +285,7 @@ dri_init_screen(__DRIscreenPrivate * sPriv) * Returns the __GLcontextModes supported by this driver. */ static const __DRIconfig ** -dri_init_screen2(__DRIscreenPrivate * sPriv) +dri_init_screen2(__DRIscreen * sPriv) { struct dri_screen *screen; struct drm_create_screen_arg arg; @@ -308,6 +308,7 @@ dri_init_screen2(__DRIscreenPrivate * sPriv) } /* We need to hook in here */ + screen->pipe_screen->update_buffer = dri_update_buffer; screen->pipe_screen->flush_frontbuffer = dri_flush_frontbuffer; driParseOptionInfo(&screen->optionCache, @@ -319,7 +320,7 @@ dri_init_screen2(__DRIscreenPrivate * sPriv) } static void -dri_destroy_screen(__DRIscreenPrivate * sPriv) +dri_destroy_screen(__DRIscreen * sPriv) { struct dri_screen *screen = dri_screen(sPriv); @@ -346,4 +347,12 @@ PUBLIC const struct __DriverAPIRec driDriverAPI = { .InitScreen2 = dri_init_screen2, }; +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driLegacyExtension.base, + &driDRI2Extension.base, + NULL +}; + /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h index f6c56d0f0c5..03387a0e813 100644 --- a/src/gallium/state_trackers/dri/dri_screen.h +++ b/src/gallium/state_trackers/dri/dri_screen.h @@ -42,7 +42,7 @@ struct dri_screen { /* dri */ - __DRIscreenPrivate *sPriv; + __DRIscreen *sPriv; /** * Configuration cache with default values for all contexts @@ -63,7 +63,7 @@ struct dri_screen /** cast wrapper */ static INLINE struct dri_screen * -dri_screen(__DRIscreenPrivate * sPriv) +dri_screen(__DRIscreen * sPriv) { return (struct dri_screen *)sPriv->private; } diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c index 0b60b5be059..d55aa51b82d 100644 --- a/src/gallium/state_trackers/egl/egl_surface.c +++ b/src/gallium/state_trackers/egl/egl_surface.c @@ -171,9 +171,9 @@ drm_takedown_shown_screen(_EGLDisplay *dpy, struct drm_screen *screen) drmModeSetCrtc( dev->drmFD, screen->crtcID, - 0, // FD + 0, /* FD */ 0, 0, - NULL, 0, // List of output ids + NULL, 0, /* List of output ids */ NULL); drmModeRmFB(dev->drmFD, screen->fbID); diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c index 745803c7eb0..9345b0f4908 100644 --- a/src/gallium/state_trackers/egl/egl_tracker.c +++ b/src/gallium/state_trackers/egl/egl_tracker.c @@ -152,6 +152,7 @@ drm_initialize(_EGLDriver *drv, _EGLDisplay *disp, EGLint *major, EGLint *minor) int num_screens = 0; EGLint i; int fd; + _EGLConfig *config; dev = (struct drm_device *) calloc(1, sizeof(struct drm_device)); if (!dev) @@ -206,7 +207,7 @@ drm_initialize(_EGLDriver *drv, _EGLDisplay *disp, EGLint *major, EGLint *minor) disp->DriverData = dev; /* for now we only have one config */ - _EGLConfig *config = calloc(1, sizeof(*config)); + config = calloc(1, sizeof(*config)); memset(config, 1, sizeof(*config)); _eglInitConfig(config, 1); _eglSetConfigAttrib(config, EGL_RED_SIZE, 8); diff --git a/src/gallium/state_trackers/egl_g3d/Makefile b/src/gallium/state_trackers/egl_g3d/Makefile new file mode 100644 index 00000000000..213eb3e815b --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/Makefile @@ -0,0 +1,72 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +common_INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/include + +common_SOURCES = $(wildcard common/*.c) +common_OBJECTS = $(common_SOURCES:.c=.o) + + +x11_INCLUDES = \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/glx/x11 \ + -I$(TOP)/src/mesa \ + $(shell pkg-config --cflags-only-I libdrm) + +x11_SOURCES = $(wildcard x11/*.c) $(TOP)/src/glx/x11/dri2.c +x11_OBJECTS = $(x11_SOURCES:.c=.o) + + +kms_INCLUDES = $(shell pkg-config --cflags-only-I libdrm) +kms_SOURCES = $(wildcard kms/*.c) +kms_OBJECTS = $(kms_SOURCES:.c=.o) + + +ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) +ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) +ALL_OBJECTS = $(common_OBJECTS) $(x11_OBJECTS) $(kms_OBJECTS) + +##### TARGETS ##### + +EGL_DISPLAYS_MODS = $(foreach dpy, $(EGL_DISPLAYS), libegl$(dpy).a) + +default: depend $(EGL_DISPLAYS_MODS) + + +libeglx11.a: $(x11_OBJECTS) $(common_OBJECTS) Makefile + $(MKLIB) -o eglx11 -static $(x11_OBJECTS) $(common_OBJECTS) + +libeglkms.a: $(kms_OBJECTS) $(common_OBJECTS) Makefile + $(MKLIB) -o eglkms -static $(kms_OBJECTS) $(common_OBJECTS) + +depend: + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(ALL_INCLUDES) $(ALL_SOURCES) 2> /dev/null + +clean: + rm -f $(ALL_OBJECTS) + rm -f $(EGL_DISPLAYS_MODS) + rm -f depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +$(common_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + +$(x11_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(x11_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + +$(kms_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(kms_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + +sinclude depend diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c new file mode 100644 index 00000000000..51da8e19f5e --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c @@ -0,0 +1,1147 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <string.h> +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "egldriver.h" +#include "eglcurrent.h" +#include "eglconfigutil.h" +#include "egllog.h" + +#include "native.h" +#include "egl_g3d.h" +#include "egl_st.h" + +/** + * Validate the draw/read surfaces of the context. + */ +static void +egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct pipe_screen *screen = gdpy->native->screen; + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + EGLint num_surfaces; + EGLint s, i; + + /* validate draw and/or read buffers */ + num_surfaces = (gctx->base.ReadSurface == gctx->base.DrawSurface) ? 1 : 2; + for (s = 0; s < num_surfaces; s++) { + struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + struct egl_g3d_surface *gsurf; + struct egl_g3d_buffer *gbuf; + + if (s == 0) { + gsurf = egl_g3d_surface(gctx->base.DrawSurface); + gbuf = &gctx->draw; + } + else { + gsurf = egl_g3d_surface(gctx->base.ReadSurface); + gbuf = &gctx->read; + } + + if (!gctx->force_validate) { + unsigned int seq_num; + + gsurf->native->validate(gsurf->native, + gbuf->native_atts, gbuf->num_atts, + &seq_num, NULL, NULL, NULL); + /* skip validation */ + if (gsurf->sequence_number == seq_num) + continue; + } + + gsurf->native->validate(gsurf->native, + gbuf->native_atts, gbuf->num_atts, + &gsurf->sequence_number, textures, + &gsurf->base.Width, &gsurf->base.Height); + for (i = 0; i < gbuf->num_atts; i++) { + struct pipe_texture *pt = textures[i]; + struct pipe_surface *ps; + + if (pt) { + ps = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + gctx->stapi->st_set_framebuffer_surface(gbuf->st_fb, + gbuf->st_atts[i], ps); + + if (gbuf->native_atts[i] == gsurf->render_att) + pipe_surface_reference(&gsurf->render_surface, ps); + + pipe_surface_reference(&ps, NULL); + pipe_texture_reference(&pt, NULL); + } + } + + gctx->stapi->st_resize_framebuffer(gbuf->st_fb, + gsurf->base.Width, gsurf->base.Height); + } + + gctx->force_validate = EGL_FALSE; + +} + +/** + * Create a st_framebuffer. + */ +static struct st_framebuffer * +create_framebuffer(_EGLContext *ctx, _EGLSurface *surf) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config); + + return gctx->stapi->st_create_framebuffer(&gconf->native->mode, + gconf->native->color_format, gconf->native->depth_format, + gconf->native->stencil_format, + gsurf->base.Width, gsurf->base.Height, &gsurf->base); +} + +/** + * Update the attachments of draw/read surfaces. + */ +static void +egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = { + ST_SURFACE_FRONT_LEFT, + ST_SURFACE_BACK_LEFT, + ST_SURFACE_FRONT_RIGHT, + ST_SURFACE_BACK_RIGHT, + }; + EGLint s, i; + + /* route draw and read buffers' attachments */ + for (s = 0; s < 2; s++) { + struct egl_g3d_surface *gsurf; + struct egl_g3d_buffer *gbuf; + + if (s == 0) { + gsurf = egl_g3d_surface(gctx->base.DrawSurface); + gbuf = &gctx->draw; + } + else { + gsurf = egl_g3d_surface(gctx->base.ReadSurface); + gbuf = &gctx->read; + } + + gbuf->native_atts[0] = gsurf->render_att; + gbuf->num_atts = 1; + + for (i = 0; i < gbuf->num_atts; i++) + gbuf->st_atts[i] = st_att_map[gbuf->native_atts[i]]; + + /* FIXME OpenGL defaults to draw the front or back buffer when the + * context is single-buffered or double-buffered respectively. In EGL, + * however, the buffer to be drawn is determined by the surface, instead + * of the context. As a result, rendering to a pixmap surface with a + * double-buffered context does not work as expected. + * + * gctx->stapi->st_draw_front_buffer(gctx->st_ctx, natt == + * NATIVE_ATTACHMENT_FRONT_LEFT); + */ + + /* + * FIXME If the back buffer is asked for here, and the front buffer is + * later needed by the client API (e.g. glDrawBuffer is called to draw + * the front buffer), it will create a new pipe texture and draw there. + * One fix is to ask for both buffers here, but it would be a waste if + * the front buffer is never used. A better fix is to add a callback to + * the pipe screen with context private (just like flush_frontbuffer). + */ + } +} + +/** + * Reallocate the context's framebuffers after draw/read surfaces change. + */ +static EGLBoolean +egl_g3d_realloc_context(_EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + struct egl_g3d_surface *gdraw = egl_g3d_surface(gctx->base.DrawSurface); + struct egl_g3d_surface *gread = egl_g3d_surface(gctx->base.ReadSurface); + + /* unreference the old framebuffers */ + if (gctx->draw.st_fb) { + EGLBoolean is_equal = (gctx->draw.st_fb == gctx->read.st_fb); + void *priv; + + priv = gctx->stapi->st_framebuffer_private(gctx->draw.st_fb); + if (!gdraw || priv != (void *) &gdraw->base) { + gctx->stapi->st_unreference_framebuffer(gctx->draw.st_fb); + gctx->draw.st_fb = NULL; + gctx->draw.num_atts = 0; + } + + if (is_equal) { + gctx->read.st_fb = NULL; + gctx->draw.num_atts = 0; + } + else { + priv = gctx->stapi->st_framebuffer_private(gctx->read.st_fb); + if (!gread || priv != (void *) &gread->base) { + gctx->stapi->st_unreference_framebuffer(gctx->read.st_fb); + gctx->read.st_fb = NULL; + gctx->draw.num_atts = 0; + } + } + } + + if (!gdraw) + return EGL_TRUE; + + /* create the draw fb */ + if (!gctx->draw.st_fb) { + gctx->draw.st_fb = create_framebuffer(&gctx->base, &gdraw->base); + if (!gctx->draw.st_fb) + return EGL_FALSE; + } + + /* create the read fb */ + if (!gctx->read.st_fb) { + if (gread != gdraw) { + gctx->read.st_fb = create_framebuffer(&gctx->base, &gread->base); + if (!gctx->read.st_fb) { + gctx->stapi->st_unreference_framebuffer(gctx->draw.st_fb); + gctx->draw.st_fb = NULL; + return EGL_FALSE; + } + } + else { + /* there is no st_reference_framebuffer... */ + gctx->read.st_fb = gctx->draw.st_fb; + } + } + + egl_g3d_route_context(dpy, &gctx->base); + gctx->force_validate = EGL_TRUE; + + return EGL_TRUE; +} + +/** + * Return the current context of the given API. + */ +static struct egl_g3d_context * +egl_g3d_get_current_context(EGLint api) +{ + _EGLThreadInfo *t = _eglGetCurrentThread(); + EGLint api_index = _eglConvertApiToIndex(api); + return egl_g3d_context(t->CurrentContexts[api_index]); +} + +/** + * Return the state tracker for the given context. + */ +static const struct egl_g3d_st * +egl_g3d_choose_st(_EGLDriver *drv, _EGLContext *ctx) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + const struct egl_g3d_st *stapi; + EGLint idx = -1; + + switch (ctx->ClientAPI) { + case EGL_OPENGL_ES_API: + switch (ctx->ClientVersion) { + case 1: + idx = EGL_G3D_ST_OPENGL_ES; + break; + case 2: + idx = EGL_G3D_ST_OPENGL_ES2; + break; + default: + _eglLog(_EGL_WARNING, "unknown client version %d", + ctx->ClientVersion); + break; + } + break; + case EGL_OPENVG_API: + idx = EGL_G3D_ST_OPENVG; + break; + case EGL_OPENGL_API: + idx = EGL_G3D_ST_OPENGL; + break; + default: + _eglLog(_EGL_WARNING, "unknown client API 0x%04x", ctx->ClientAPI); + break; + } + + stapi = (idx >= 0) ? gdrv->stapis[idx] : NULL; + return stapi; +} + +/** + * Return an API mask that consists of the state trackers that supports the + * given mode. + * + * FIXME add st_is_mode_supported()? + */ +static EGLint +get_mode_api_mask(const __GLcontextModes *mode, EGLint api_mask) +{ + EGLint check; + + /* OpenGL ES 1.x and 2.x are checked together */ + check = EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT; + if (api_mask & check) { + /* this is required by EGL, not by OpenGL ES */ + if (mode->drawableType & GLX_WINDOW_BIT && !mode->doubleBufferMode) + api_mask &= ~check; + } + + check = EGL_OPENVG_BIT; + if (api_mask & check) { + /* vega st needs the depth/stencil rb */ + if (!mode->depthBits && !mode->stencilBits) + api_mask &= ~check; + } + + return api_mask; +} + +#ifdef EGL_MESA_screen_surface + +static void +egl_g3d_add_screens(_EGLDriver *drv, _EGLDisplay *dpy) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + const struct native_connector **native_connectors; + EGLint num_connectors, i; + + native_connectors = + gdpy->native->modeset->get_connectors(gdpy->native, &num_connectors, NULL); + if (!num_connectors) { + if (native_connectors) + free(native_connectors); + return; + } + + for (i = 0; i < num_connectors; i++) { + const struct native_connector *nconn = native_connectors[i]; + struct egl_g3d_screen *gscr; + const struct native_mode **native_modes; + EGLint num_modes, j; + + /* TODO support for hotplug */ + native_modes = + gdpy->native->modeset->get_modes(gdpy->native, nconn, &num_modes); + if (!num_modes) { + if (native_modes) + free(native_modes); + continue; + } + + gscr = CALLOC_STRUCT(egl_g3d_screen); + if (!gscr) { + free(native_modes); + continue; + } + + _eglInitScreen(&gscr->base); + + for (j = 0; j < num_modes; j++) { + const struct native_mode *nmode = native_modes[j]; + _EGLMode *mode; + + mode = _eglAddNewMode(&gscr->base, nmode->width, nmode->height, + nmode->refresh_rate, nmode->desc); + if (!mode) + break; + /* gscr->native_modes and gscr->base.Modes should be consistent */ + assert(mode == &gscr->base.Modes[j]); + } + + gscr->native = nconn; + gscr->native_modes = native_modes; + + _eglAddScreen(dpy, &gscr->base); + } + + free(native_connectors); +} + +#endif /* EGL_MESA_screen_surface */ + +/** + * Add configs to display and return the next config ID. + */ +static EGLint +egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + const struct native_config **native_configs; + int num_configs, i; + + native_configs = gdpy->native->get_configs(gdpy->native, + &num_configs); + if (!num_configs) { + if (native_configs) + free(native_configs); + return id; + } + + for (i = 0; i < num_configs; i++) { + EGLint api_mask; + struct egl_g3d_config *gconf; + EGLBoolean valid; + + api_mask = get_mode_api_mask(&native_configs[i]->mode, gdrv->api_mask); + if (!api_mask) { + _eglLog(_EGL_DEBUG, "no state tracker supports config 0x%x", + native_configs[i]->mode.visualID); + continue; + } + + gconf = CALLOC_STRUCT(egl_g3d_config); + if (!gconf) + continue; + + _eglInitConfig(&gconf->base, id); + valid = _eglConfigFromContextModesRec(&gconf->base, + &native_configs[i]->mode, api_mask, api_mask); + if (valid) { +#ifdef EGL_MESA_screen_surface + /* check if scanout surface bit is set */ + if (native_configs[i]->scanout_bit) { + EGLint val = GET_CONFIG_ATTRIB(&gconf->base, EGL_SURFACE_TYPE); + val |= EGL_SCREEN_BIT_MESA; + SET_CONFIG_ATTRIB(&gconf->base, EGL_SURFACE_TYPE, val); + } +#endif + valid = _eglValidateConfig(&gconf->base, EGL_FALSE); + } + if (!valid) { + _eglLog(_EGL_DEBUG, "skip invalid config 0x%x", + native_configs[i]->mode.visualID); + free(gconf); + continue; + } + + gconf->native = native_configs[i]; + _eglAddConfig(dpy, &gconf->base); + id++; + } + + free(native_configs); + return id; +} + +/** + * Flush the front buffer of the context's draw surface. + */ +static void +egl_g3d_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, void *context_private) +{ + struct egl_g3d_context *gctx = egl_g3d_context(context_private); + struct egl_g3d_surface *gsurf = egl_g3d_surface(gctx->base.DrawSurface); + + if (gsurf) + gsurf->native->flush_frontbuffer(gsurf->native); +} + +/** + * Re-validate the context. + */ +static void +egl_g3d_update_buffer(struct pipe_screen *screen, void *context_private) +{ + struct egl_g3d_context *gctx = egl_g3d_context(context_private); + + /** + * It is likely that the surface has changed when this function is called. + * Set force_validate to skip an unnecessary check. + */ + gctx->force_validate = EGL_TRUE; + egl_g3d_validate_context(gctx->base.Display, &gctx->base); +} + +static EGLBoolean +egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + EGLint i; + + _eglReleaseDisplayResources(drv, dpy); + _eglCleanupDisplay(dpy); + + if (dpy->Screens) { + for (i = 0; i < dpy->NumScreens; i++) { + struct egl_g3d_screen *gscr = egl_g3d_screen(dpy->Screens[i]); + free(gscr->native_modes); + free(gscr); + } + free(dpy->Screens); + } + + if (gdpy->native) + gdpy->native->destroy(gdpy->native); + + free(gdpy); + dpy->DriverData = NULL; + + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, + EGLint *major, EGLint *minor) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + struct egl_g3d_display *gdpy; + + gdpy = CALLOC_STRUCT(egl_g3d_display); + if (!gdpy) { + _eglError(EGL_BAD_ALLOC, "eglInitialize"); + goto fail; + } + dpy->DriverData = gdpy; + + gdpy->native = native_create_display(dpy->NativeDisplay); + if (!gdpy->native) { + _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); + goto fail; + } + + gdpy->native->screen->flush_frontbuffer = egl_g3d_flush_frontbuffer; + gdpy->native->screen->update_buffer = egl_g3d_update_buffer; + + dpy->ClientAPIsMask = gdrv->api_mask; + + if (egl_g3d_add_configs(drv, dpy, 1) == 1) { + _eglError(EGL_NOT_INITIALIZED, "eglInitialize(unable to add configs)"); + goto fail; + } + +#ifdef EGL_MESA_screen_surface + /* enable MESA_screen_surface */ + if (gdpy->native->modeset) { + dpy->Extensions.MESA_screen_surface = EGL_TRUE; + egl_g3d_add_screens(drv, dpy); + } +#endif + + *major = 1; + *minor = 4; + + return EGL_TRUE; + +fail: + if (gdpy) + egl_g3d_terminate(drv, dpy); + return EGL_FALSE; +} + +static _EGLContext * +egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, + _EGLContext *share, const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_context *gshare = egl_g3d_context(share); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_context *gctx; + const __GLcontextModes *mode; + + gctx = CALLOC_STRUCT(egl_g3d_context); + if (!gctx) { + _eglError(EGL_BAD_ALLOC, "eglCreateContext"); + return NULL; + } + + if (!_eglInitContext(drv, &gctx->base, conf, attribs)) { + free(gctx); + return NULL; + } + + gctx->stapi = egl_g3d_choose_st(drv, &gctx->base); + if (!gctx->stapi) { + free(gctx); + return NULL; + } + + mode = &gconf->native->mode; + gctx->pipe = + gdpy->native->create_context(gdpy->native, (void *) &gctx->base); + if (!gctx->pipe) { + free(gctx); + return NULL; + } + + gctx->st_ctx = gctx->stapi->st_create_context(gctx->pipe, mode, + (gshare) ? gshare->st_ctx : NULL); + if (!gctx->st_ctx) { + gctx->pipe->destroy(gctx->pipe); + free(gctx); + return NULL; + } + + return &gctx->base; +} + +static EGLBoolean +egl_g3d_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + + if (_eglIsContextBound(&gctx->base)) + return EGL_TRUE; + + egl_g3d_realloc_context(dpy, &gctx->base); + + /* it will destroy pipe context */ + gctx->stapi->st_destroy_context(gctx->st_ctx); + + free(gctx); + + return EGL_TRUE; +} + +static EGLBoolean +init_surface_geometry(_EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + return gsurf->native->validate(gsurf->native, NULL, 0, + &gsurf->sequence_number, NULL, + &gsurf->base.Width, &gsurf->base.Height); +} + +static _EGLSurface * +egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, EGLNativeWindowType win, + const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreateWindowSurface"); + return NULL; + } + + if (!_eglInitSurface(drv, &gsurf->base, EGL_WINDOW_BIT, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->create_window_surface(gdpy->native, win, gconf->native); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + + gsurf->render_att = (gsurf->base.RenderBuffer == EGL_SINGLE_BUFFER || + !gconf->native->mode.doubleBufferMode) ? + NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + + return &gsurf->base; +} + +static _EGLSurface * +egl_g3d_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, EGLNativePixmapType pix, + const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreatePixmapSurface"); + return NULL; + } + + if (!_eglInitSurface(drv, &gsurf->base, EGL_PIXMAP_BIT, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->create_pixmap_surface(gdpy->native, pix, gconf->native); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + + gsurf->render_att = NATIVE_ATTACHMENT_FRONT_LEFT; + + return &gsurf->base; +} + +static _EGLSurface * +egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); + return NULL; + } + + if (!_eglInitSurface(drv, &gsurf->base, EGL_PBUFFER_BIT, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->create_pbuffer_surface(gdpy->native, gconf->native, + gsurf->base.Width, gsurf->base.Height); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + + gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? + NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + + return &gsurf->base; +} + +static EGLBoolean +egl_g3d_destroy_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + if (_eglIsSurfaceBound(&gsurf->base)) + return EGL_TRUE; + + pipe_surface_reference(&gsurf->render_surface, NULL); + gsurf->native->destroy(gsurf->native); + free(gsurf); + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_make_current(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLSurface *draw, _EGLSurface *read, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + struct egl_g3d_context *old_gctx; + EGLint api; + EGLBoolean ok = EGL_TRUE; + + /* find the old context */ + api = (gctx) ? gctx->base.ClientAPI : eglQueryAPI(); + old_gctx = egl_g3d_get_current_context(api); + if (old_gctx && !_eglIsContextLinked(&old_gctx->base)) + old_gctx = NULL; + + if (!_eglMakeCurrent(drv, dpy, draw, read, ctx)) + return EGL_FALSE; + + if (old_gctx) { + /* flush old context */ + old_gctx->stapi->st_flush(old_gctx->st_ctx, + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); + + /* + * The old context is no longer current, and egl_g3d_realloc_context() + * should be called to destroy the framebuffers. However, it is possible + * that it will be made current again with the same draw/read surfaces. + * It might be better to keep it around. + */ + } + + if (gctx) { + struct egl_g3d_surface *gdraw = egl_g3d_surface(draw); + + ok = egl_g3d_realloc_context(dpy, &gctx->base); + if (ok) { + ok = gctx->stapi->st_make_current(gctx->st_ctx, + gctx->draw.st_fb, gctx->read.st_fb); + if (ok) { + egl_g3d_validate_context(dpy, &gctx->base); + if (gdraw->base.Type == EGL_WINDOW_BIT) { + gctx->base.WindowRenderBuffer = + (gdraw->render_att == NATIVE_ATTACHMENT_FRONT_LEFT) ? + EGL_SINGLE_BUFFER : EGL_BACK_BUFFER; + } + } + } + } + else if (old_gctx) { + ok = old_gctx->stapi->st_make_current(NULL, NULL, NULL); + old_gctx->base.WindowRenderBuffer = EGL_NONE; + } + + return ok; +} + +static EGLBoolean +egl_g3d_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + _EGLContext *ctx = _eglGetCurrentContext(); + struct egl_g3d_context *gctx = NULL; + + /* no-op for pixmap or pbuffer surface */ + if (gsurf->base.Type == EGL_PIXMAP_BIT || + gsurf->base.Type == EGL_PBUFFER_BIT) + return EGL_TRUE; + + /* or when the surface is single-buffered */ + if (gsurf->render_att == NATIVE_ATTACHMENT_FRONT_LEFT) + return EGL_TRUE; + + if (ctx && ctx->DrawSurface == surf) + gctx = egl_g3d_context(ctx); + + /* flush if the surface is current */ + if (gctx) + gctx->stapi->st_notify_swapbuffers(gctx->draw.st_fb); + + /* + * We drew on the back buffer, unless there was no back buffer. + * In that case, we drew on the front buffer. Either case, we call + * swap_buffers. + */ + if (!gsurf->native->swap_buffers(gsurf->native)) + return EGL_FALSE; + + if (gctx) { + struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config); + + /* force validation if the swap method is not copy */ + if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) { + gctx->force_validate = EGL_TRUE; + egl_g3d_validate_context(dpy, &gctx->base); + } + } + + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) +{ + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + gctx->stapi->st_finish(gctx->st_ctx); + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_wait_native(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine) +{ + _EGLSurface *surf = _eglGetCurrentSurface(EGL_DRAW); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + if (engine != EGL_CORE_NATIVE_ENGINE) + return _eglError(EGL_BAD_PARAMETER, "eglWaitNative"); + + if (gsurf) + gsurf->native->wait(gsurf->native); + + return EGL_TRUE; +} + +static _EGLProc +egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + _EGLProc proc; + EGLint i; + + for (i = 0; i < NUM_EGL_G3D_STS; i++) { + const struct egl_g3d_st *stapi = gdrv->stapis[i]; + if (stapi) { + proc = (_EGLProc) stapi->st_get_proc_address(procname); + if (proc) + return proc; + } + } + + return (_EGLProc) NULL; +} + +static EGLBoolean +egl_g3d_bind_tex_image(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLSurface *surf, EGLint buffer) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + struct egl_g3d_context *gctx; + enum pipe_format target_format; + int target; + + if (!gsurf || gsurf->base.Type != EGL_PBUFFER_BIT) + return _eglError(EGL_BAD_SURFACE, "eglBindTexImage"); + if (buffer != EGL_BACK_BUFFER) + return _eglError(EGL_BAD_PARAMETER, "eglBindTexImage"); + if (gsurf->base.BoundToTexture) + return _eglError(EGL_BAD_ACCESS, "eglBindTexImage"); + + switch (gsurf->base.TextureFormat) { + case EGL_TEXTURE_RGB: + target_format = PIPE_FORMAT_R8G8B8_UNORM; + break; + case EGL_TEXTURE_RGBA: + target_format = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + default: + return _eglError(EGL_BAD_MATCH, "eglBindTexImage"); + } + + switch (gsurf->base.TextureTarget) { + case EGL_TEXTURE_2D: + target = ST_TEXTURE_2D; + break; + default: + return _eglError(EGL_BAD_MATCH, "eglBindTexImage"); + } + + /* flush properly if the surface is bound */ + if (gsurf->base.Binding) { + gctx = egl_g3d_context(gsurf->base.Binding); + gctx->stapi->st_flush(gctx->st_ctx, + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); + } + + /* XXX change to EGL_OPENGL_ES_API once OpenGL ES is merged */ + gctx = egl_g3d_get_current_context(EGL_OPENGL_API); + if (gctx) { + if (!gsurf->render_surface) + return EGL_FALSE; + + gctx->stapi->st_bind_texture_surface(gsurf->render_surface, + target, gsurf->base.MipmapLevel, target_format); + gsurf->base.BoundToTexture = EGL_TRUE; + } + + return EGL_TRUE; +} + +static EGLBoolean +egl_g3d_release_tex_image(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLSurface *surf, EGLint buffer) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + if (!gsurf || gsurf->base.Type != EGL_PBUFFER_BIT || + !gsurf->base.BoundToTexture) + return _eglError(EGL_BAD_SURFACE, "eglReleaseTexImage"); + if (buffer != EGL_BACK_BUFFER) + return _eglError(EGL_BAD_PARAMETER, "eglReleaseTexImage"); + + if (gsurf->render_surface) { + _EGLThreadInfo *t = _eglGetCurrentThread(); + /* XXX change to EGL_OPENGL_ES_API once OpenGL ES is merged */ + struct egl_g3d_context *gctx = egl_g3d_context( + t->CurrentContexts[_eglConvertApiToIndex(EGL_OPENGL_API)]); + + /* what if the context the surface binds to is no longer current? */ + if (gctx) + gctx->stapi->st_unbind_texture_surface(gsurf->render_surface, + ST_TEXTURE_2D, gsurf->base.MipmapLevel); + } + + gsurf->base.BoundToTexture = EGL_FALSE; + + return EGL_TRUE; +} + +#ifdef EGL_MESA_screen_surface + +static _EGLSurface * +egl_g3d_create_screen_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLConfig *conf, const EGLint *attribs) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf = egl_g3d_config(conf); + struct egl_g3d_surface *gsurf; + + gsurf = CALLOC_STRUCT(egl_g3d_surface); + if (!gsurf) { + _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); + return NULL; + } + + if (!_eglInitSurface(drv, &gsurf->base, + EGL_SCREEN_BIT_MESA, conf, attribs)) { + free(gsurf); + return NULL; + } + + gsurf->native = + gdpy->native->modeset->create_scanout_surface(gdpy->native, + gconf->native, gsurf->base.Width, gsurf->base.Height); + if (!gsurf->native) { + free(gsurf); + return NULL; + } + + gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? + NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; + + return &gsurf->base; +} + +static EGLBoolean +egl_g3d_show_screen_surface(_EGLDriver *drv, _EGLDisplay *dpy, + _EGLScreen *scr, _EGLSurface *surf, + _EGLMode *mode) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_screen *gscr = egl_g3d_screen(scr); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + struct native_surface *nsurf; + const struct native_mode *nmode; + EGLBoolean changed; + + if (gsurf) { + EGLint idx; + + if (!mode) + return _eglError(EGL_BAD_MATCH, "eglShowSurfaceMESA"); + if (gsurf->base.Type != EGL_SCREEN_BIT_MESA) + return _eglError(EGL_BAD_SURFACE, "eglShowScreenSurfaceMESA"); + if (gsurf->base.Width < mode->Width || gsurf->base.Height < mode->Height) + return _eglError(EGL_BAD_MATCH, + "eglShowSurfaceMESA(surface smaller than mode size)"); + + /* find the index of the mode */ + for (idx = 0; idx < gscr->base.NumModes; idx++) + if (mode == &gscr->base.Modes[idx]) + break; + if (idx >= gscr->base.NumModes) { + return _eglError(EGL_BAD_MODE_MESA, + "eglShowSurfaceMESA(unknown mode)"); + } + + nsurf = gsurf->native; + nmode = gscr->native_modes[idx]; + } + else { + if (mode) + return _eglError(EGL_BAD_MATCH, "eglShowSurfaceMESA"); + + /* disable the screen */ + nsurf = NULL; + nmode = NULL; + } + + /* TODO surface panning by CRTC choosing */ + changed = gdpy->native->modeset->program(gdpy->native, 0, nsurf, + gscr->base.OriginX, gscr->base.OriginY, &gscr->native, 1, nmode); + if (changed) { + gscr->base.CurrentSurface = &gsurf->base; + gscr->base.CurrentMode = mode; + } + + return changed; +} + +#endif /* EGL_MESA_screen_surface */ + +static void +egl_g3d_unload(_EGLDriver *drv) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + free(gdrv); +} + +_EGLDriver * +_eglMain(const char *args) +{ + static char driver_name[64]; + struct egl_g3d_driver *gdrv; + EGLint i; + + snprintf(driver_name, sizeof(driver_name), + "Gallium/%s", native_get_name()); + + gdrv = CALLOC_STRUCT(egl_g3d_driver); + if (!gdrv) + return NULL; + + _eglInitDriverFallbacks(&gdrv->base); + + gdrv->base.API.Initialize = egl_g3d_initialize; + gdrv->base.API.Terminate = egl_g3d_terminate; + gdrv->base.API.CreateContext = egl_g3d_create_context; + gdrv->base.API.DestroyContext = egl_g3d_destroy_context; + gdrv->base.API.CreateWindowSurface = egl_g3d_create_window_surface; + gdrv->base.API.CreatePixmapSurface = egl_g3d_create_pixmap_surface; + gdrv->base.API.CreatePbufferSurface = egl_g3d_create_pbuffer_surface; + gdrv->base.API.DestroySurface = egl_g3d_destroy_surface; + gdrv->base.API.MakeCurrent = egl_g3d_make_current; + gdrv->base.API.SwapBuffers = egl_g3d_swap_buffers; + gdrv->base.API.WaitClient = egl_g3d_wait_client; + gdrv->base.API.WaitNative = egl_g3d_wait_native; + gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address; + + gdrv->base.API.BindTexImage = egl_g3d_bind_tex_image; + gdrv->base.API.ReleaseTexImage = egl_g3d_release_tex_image; + +#ifdef EGL_MESA_screen_surface + gdrv->base.API.CreateScreenSurfaceMESA = egl_g3d_create_screen_surface; + gdrv->base.API.ShowScreenSurfaceMESA = egl_g3d_show_screen_surface; +#endif + + gdrv->base.Name = driver_name; + gdrv->base.Unload = egl_g3d_unload; + + for (i = 0; i < NUM_EGL_G3D_STS; i++) { + gdrv->stapis[i] = egl_g3d_get_st(i); + if (gdrv->stapis[i]) + gdrv->api_mask |= gdrv->stapis[i]->api_bit; + } + + if (gdrv->api_mask) + _eglLog(_EGL_DEBUG, "Driver API mask: 0x%x", gdrv->api_mask); + else + _eglLog(_EGL_WARNING, "No supported client API"); + + return &gdrv->base; +} diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h new file mode 100644 index 00000000000..4c8b8dfe9e0 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h @@ -0,0 +1,127 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EGL_G3D_H_ +#define _EGL_G3D_H_ + +#include "pipe/p_compiler.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" +#include "egldriver.h" +#include "egldisplay.h" +#include "eglcontext.h" +#include "eglsurface.h" +#include "eglconfig.h" +#include "eglscreen.h" +#include "eglmode.h" + +#include "native.h" +#include "egl_st.h" + +struct egl_g3d_driver { + _EGLDriver base; + const struct egl_g3d_st *stapis[NUM_EGL_G3D_STS]; + EGLint api_mask; +}; + +struct egl_g3d_display { + struct native_display *native; +}; + +struct egl_g3d_buffer { + struct st_framebuffer *st_fb; + EGLint num_atts; + enum native_attachment native_atts[NUM_NATIVE_ATTACHMENTS]; + uint st_atts[NUM_NATIVE_ATTACHMENTS]; +}; + +struct egl_g3d_context { + _EGLContext base; + + const struct egl_g3d_st *stapi; + struct pipe_context *pipe; + + struct st_context *st_ctx; + EGLBoolean force_validate; + struct egl_g3d_buffer draw, read; +}; + +struct egl_g3d_surface { + _EGLSurface base; + struct native_surface *native; + enum native_attachment render_att; + struct pipe_surface *render_surface; + unsigned int sequence_number; +}; + +struct egl_g3d_config { + _EGLConfig base; + const struct native_config *native; +}; + +struct egl_g3d_screen { + _EGLScreen base; + const struct native_connector *native; + const struct native_mode **native_modes; +}; + +static INLINE struct egl_g3d_driver * +egl_g3d_driver(_EGLDriver *drv) +{ + return (struct egl_g3d_driver *) drv; +} + +static INLINE struct egl_g3d_display * +egl_g3d_display(_EGLDisplay *dpy) +{ + /* note that it is not direct casting */ + return (struct egl_g3d_display *) dpy->DriverData; +} + +static INLINE struct egl_g3d_context * +egl_g3d_context(_EGLContext *ctx) +{ + return (struct egl_g3d_context *) ctx; +} + +static INLINE struct egl_g3d_surface * +egl_g3d_surface(_EGLSurface *surf) +{ + return (struct egl_g3d_surface *) surf; +} + +static INLINE struct egl_g3d_config * +egl_g3d_config(_EGLConfig *conf) +{ + return (struct egl_g3d_config *) conf; +} + +static INLINE struct egl_g3d_screen * +egl_g3d_screen(_EGLScreen *scr) +{ + return (struct egl_g3d_screen *) scr; +} + +#endif /* _EGL_G3D_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_st.c b/src/gallium/state_trackers/egl_g3d/common/egl_st.c new file mode 100644 index 00000000000..a88ff911cd5 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/common/egl_st.c @@ -0,0 +1,131 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <dlfcn.h> +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "egllog.h" +#include "EGL/egl.h" /* for EGL_api_BIT */ + +#include "egl_st.h" + +#ifndef HAVE_DLADDR +#define HAVE_DLADDR 1 +#endif + +#if HAVE_DLADDR + +static const char * +egl_g3d_st_names[] = { +#define ST_PUBLIC(name, ...) #name, +#include "st_public_tmp.h" + NULL +}; + +static boolean +egl_g3d_fill_st(struct egl_g3d_st *stapi, void *sym) +{ + st_proc *procs = (st_proc *) stapi; + void *handle; + Dl_info info; + const char **name; + + if (!dladdr(sym, &info)) + return FALSE; + handle = dlopen(info.dli_fname, RTLD_LAZY | RTLD_LOCAL | RTLD_NODELETE); + if (!handle) + return FALSE; + + for (name = egl_g3d_st_names; *name; name++) { + st_proc proc = (st_proc) dlsym(handle, *name); + if (!proc) { + _eglLog(_EGL_WARNING, "%s is missing in %s", *name, info.dli_fname); + memset(stapi, 0, sizeof(*stapi)); + dlclose(handle); + return FALSE; + } + *procs++ = proc; + } + + dlclose(handle); + return TRUE; +} + +#else /* HAVE_DLADDR */ + +static boolean +egl_g3d_fill_st(struct egl_g3d_st *stapi, void *sym) +{ +#define ST_PUBLIC(name, ...) stapi->name = name; +#include "st_public_tmp.h" + return TRUE; +} + +#endif /* HAVE_DLADDR */ + +static boolean +egl_g3d_init_st(struct egl_g3d_st *stapi, const char *api) +{ + void *handle, *sym; + boolean res = FALSE; + + /* already initialized */ + if (stapi->st_notify_swapbuffers != NULL) + return TRUE; + + handle = dlopen(NULL, RTLD_LAZY | RTLD_LOCAL); + if (!handle) + return FALSE; + + sym = dlsym(handle, api); + if (sym && egl_g3d_fill_st(stapi, sym)) + res = TRUE; + + dlclose(handle); + return res; +} + +static struct { + const char *symbol; + EGLint api_bit; +} egl_g3d_st_info[NUM_EGL_G3D_STS] = { + { "st_api_OpenGL_ES1", EGL_OPENGL_ES_BIT }, + { "st_api_OpenVG", EGL_OPENVG_BIT }, + { "st_api_OpenGL_ES2", EGL_OPENGL_ES2_BIT }, + { "st_api_OpenGL", EGL_OPENGL_BIT }, +}; + +const struct egl_g3d_st * +egl_g3d_get_st(enum egl_g3d_st_api api) +{ + static struct egl_g3d_st all_trackers[NUM_EGL_G3D_STS]; + + if (egl_g3d_init_st(&all_trackers[api], egl_g3d_st_info[api].symbol)) { + all_trackers[api].api_bit = egl_g3d_st_info[api].api_bit; + return &all_trackers[api]; + } + else { + return NULL; + } +} diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_st.h b/src/gallium/state_trackers/egl_g3d/common/egl_st.h new file mode 100644 index 00000000000..8fb464bd3d7 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/common/egl_st.h @@ -0,0 +1,73 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EGL_ST_H_ +#define _EGL_ST_H_ + +#include "GL/gl.h" /* for GL types */ +#include "GL/internal/glcore.h" /* for __GLcontextModes */ + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" + +/* avoid calling st functions directly */ +#if 1 + +#define ST_SURFACE_FRONT_LEFT 0 +#define ST_SURFACE_BACK_LEFT 1 +#define ST_SURFACE_FRONT_RIGHT 2 +#define ST_SURFACE_BACK_RIGHT 3 + +#define ST_TEXTURE_2D 0x2 + +struct st_context; +struct st_framebuffer; +typedef void (*st_proc)(); + +#else +#include "state_tracker/st_public.h" +#endif + +/* remember to update egl_g3d_get_st() when update the enums */ +enum egl_g3d_st_api { + EGL_G3D_ST_OPENGL_ES = 0, + EGL_G3D_ST_OPENVG, + EGL_G3D_ST_OPENGL_ES2, + EGL_G3D_ST_OPENGL, + + NUM_EGL_G3D_STS +}; + +struct egl_g3d_st { +#define ST_PUBLIC(name, ret, ...) ret (*name)(__VA_ARGS__); +#include "st_public_tmp.h" + /* fields must be added here */ + EGLint api_bit; +}; + +const struct egl_g3d_st * +egl_g3d_get_st(enum egl_g3d_st_api api); + +#endif /* _EGL_ST_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/common/native.h b/src/gallium/state_trackers/egl_g3d/common/native.h new file mode 100644 index 00000000000..76f0e0c78ac --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/common/native.h @@ -0,0 +1,220 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NATIVE_H_ +#define _NATIVE_H_ + +#include "EGL/egl.h" /* for EGL native types */ +#include "GL/gl.h" /* for GL types needed by __GLcontextModes */ +#include "GL/internal/glcore.h" /* for __GLcontextModes */ + +#include "pipe/p_compiler.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +/** + * Only color buffers are listed. The others are allocated privately through, + * for example, st_renderbuffer_alloc_storage(). + */ +enum native_attachment { + NATIVE_ATTACHMENT_FRONT_LEFT, + NATIVE_ATTACHMENT_BACK_LEFT, + NATIVE_ATTACHMENT_FRONT_RIGHT, + NATIVE_ATTACHMENT_BACK_RIGHT, + + NUM_NATIVE_ATTACHMENTS +}; + +struct native_surface { + void (*destroy)(struct native_surface *nsurf); + + /** + * Swap the front and back buffers so that the back buffer is visible. It + * is no-op if the surface is single-buffered. The contents of the back + * buffer after swapping may or may not be preserved. + */ + boolean (*swap_buffers)(struct native_surface *nsurf); + + /** + * Make the front buffer visible. In some native displays, changes to the + * front buffer might not be visible immediately and require manual flush. + */ + boolean (*flush_frontbuffer)(struct native_surface *nsurf); + + /** + * Validate the buffers of the surface. The returned textures are owned by + * the caller. A sequence number is also returned. The caller can use it + * to check if anything has changed since the last call. Any of the pointers + * may be NULL and it indicates the caller has no interest in those values. + * + * If this function is called multiple times with different attachments, + * those not listed in the latest call might be destroyed. This behavior + * might change in the future. + */ + boolean (*validate)(struct native_surface *nsurf, + const enum native_attachment *natts, + unsigned num_natts, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height); + + /** + * Wait until all native commands affecting the surface has been executed. + */ + void (*wait)(struct native_surface *nsurf); +}; + +struct native_config { + /* __GLcontextModes should go away some day */ + __GLcontextModes mode; + enum pipe_format color_format; + enum pipe_format depth_format; + enum pipe_format stencil_format; + + /* treat it as an additional flag to mode.drawableType */ + boolean scanout_bit; +}; + +struct native_connector { + int dummy; +}; + +struct native_mode { + const char *desc; + int width, height; + int refresh_rate; +}; + +struct native_display_modeset; + +/** + * A pipe winsys abstracts the OS. A pipe screen abstracts the graphcis + * hardware. A native display consists of a pipe winsys, a pipe screen, and + * the native display server. + */ +struct native_display { + /** + * The pipe screen of the native display. + * + * Note that the "flush_frontbuffer" and "update_buffer" callbacks will be + * overridden. + */ + struct pipe_screen *screen; + + void (*destroy)(struct native_display *ndpy); + + /** + * Get the supported configs. The configs are owned by the display, but + * the returned array should be free()ed. + * + * The configs will be converted to EGL config by + * _eglConfigFromContextModesRec and validated by _eglValidateConfig. + * Those failing to pass the test will be skipped. + */ + const struct native_config **(*get_configs)(struct native_display *ndpy, + int *num_configs); + + /** + * Create a pipe context. + */ + struct pipe_context *(*create_context)(struct native_display *ndpy, + void *context_private); + + /** + * Create a window surface. Required unless no config has GLX_WINDOW_BIT + * set. + */ + struct native_surface *(*create_window_surface)(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf); + + /** + * Create a pixmap surface. Required unless no config has GLX_PIXMAP_BIT + * set. + */ + struct native_surface *(*create_pixmap_surface)(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf); + + /** + * Create a pbuffer surface. Required unless no config has GLX_PBUFFER_BIT + * set. + */ + struct native_surface *(*create_pbuffer_surface)(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height); + + const struct native_display_modeset *modeset; +}; + +/** + * Mode setting interface of the native display. It exposes the mode setting + * capabilities of the underlying graphics hardware. + */ +struct native_display_modeset { + /** + * Get the available physical connectors and the number of CRTCs. + */ + const struct native_connector **(*get_connectors)(struct native_display *ndpy, + int *num_connectors, + int *num_crtcs); + + /** + * Get the current supported modes of a connector. The returned modes may + * change every time this function is called and those from previous calls + * might become invalid. + */ + const struct native_mode **(*get_modes)(struct native_display *ndpy, + const struct native_connector *nconn, + int *num_modes); + + /** + * Create a scan-out surface. Required unless no config has + * GLX_SCREEN_BIT_MESA set. + */ + struct native_surface *(*create_scanout_surface)(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height); + + /** + * Program the CRTC to output the surface to the given connectors with the + * given mode. When surface is not given, the CRTC is disabled. + * + * This interface does not export a way to query capabilities of the CRTCs. + * The native display usually needs to dynamically map the index to a CRTC + * that supports the given connectors. + */ + boolean (*program)(struct native_display *ndpy, int crtc_idx, + struct native_surface *nsurf, uint x, uint y, + const struct native_connector **nconns, int num_nconns, + const struct native_mode *nmode); +}; + +const char * +native_get_name(void); + +struct native_display * +native_create_display(EGLNativeDisplayType dpy); + +#endif /* _NATIVE_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/common/st_public_tmp.h b/src/gallium/state_trackers/egl_g3d/common/st_public_tmp.h new file mode 100644 index 00000000000..507a0ec4027 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/common/st_public_tmp.h @@ -0,0 +1,20 @@ +ST_PUBLIC(st_create_context, struct st_context *, struct pipe_context *pipe, const __GLcontextModes *visual, struct st_context *share) +ST_PUBLIC(st_destroy_context, void, struct st_context *st) +ST_PUBLIC(st_copy_context_state, void, struct st_context *dst, struct st_context *src, uint mask) +ST_PUBLIC(st_create_framebuffer, struct st_framebuffer *, const __GLcontextModes *visual, enum pipe_format colorFormat, enum pipe_format depthFormat, enum pipe_format stencilFormat, uint width, uint height, void *privateData) +ST_PUBLIC(st_resize_framebuffer, void, struct st_framebuffer *stfb, uint width, uint height) +ST_PUBLIC(st_set_framebuffer_surface, void, struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface *surf) +ST_PUBLIC(st_get_framebuffer_dimensions, void, struct st_framebuffer *stfb, uint *width, uint *height) +ST_PUBLIC(st_get_framebuffer_surface, int, struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface **surface) +ST_PUBLIC(st_get_framebuffer_texture, int, struct st_framebuffer *stfb, uint surfIndex, struct pipe_texture **texture) +ST_PUBLIC(st_framebuffer_private, void *, struct st_framebuffer *stfb) +ST_PUBLIC(st_unreference_framebuffer, void, struct st_framebuffer *stfb) +ST_PUBLIC(st_make_current, GLboolean, struct st_context *st, struct st_framebuffer *draw, struct st_framebuffer *read) +ST_PUBLIC(st_get_current, struct st_context *, void) +ST_PUBLIC(st_flush, void, struct st_context *st, uint pipeFlushFlags, struct pipe_fence_handle **fence) +ST_PUBLIC(st_finish, void, struct st_context *st) +ST_PUBLIC(st_notify_swapbuffers, void, struct st_framebuffer *stfb) +ST_PUBLIC(st_bind_texture_surface, int, struct pipe_surface *ps, int target, int level, enum pipe_format format) +ST_PUBLIC(st_unbind_texture_surface, int, struct pipe_surface *ps, int target, int level) +ST_PUBLIC(st_get_proc_address, st_proc, const char *procname) +#undef ST_PUBLIC diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.c b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c new file mode 100644 index 00000000000..65829fc7b3d --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c @@ -0,0 +1,855 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <string.h> + +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "egllog.h" + +#include "native_kms.h" + +static boolean +kms_surface_validate(struct native_surface *nsurf, + const enum native_attachment *natts, + unsigned num_natts, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_display *kdpy = ksurf->kdpy; + struct pipe_screen *screen = kdpy->base.screen; + struct pipe_texture templ, *ptex; + int i; + + if (num_natts) { + if (textures) + memset(textures, 0, sizeof(*textures) * num_natts); + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.last_level = 0; + templ.width0 = ksurf->width; + templ.height0 = ksurf->height; + templ.depth0 = 1; + templ.format = ksurf->color_format; + templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; + if (ksurf->type == KMS_SURFACE_TYPE_SCANOUT) + templ.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; + } + + /* create textures */ + for (i = 0; i < num_natts; i++) { + enum native_attachment natt = natts[i]; + + ptex = ksurf->textures[natt]; + if (!ptex) { + ptex = screen->texture_create(screen, &templ); + ksurf->textures[natt] = ptex; + } + + if (textures) + pipe_texture_reference(&textures[i], ptex); + } + + if (seq_num) + *seq_num = ksurf->sequence_number; + if (width) + *width = ksurf->width; + if (height) + *height = ksurf->height; + + return TRUE; +} + +/** + * Add textures as DRM framebuffers. + */ +static boolean +kms_surface_init_framebuffers(struct native_surface *nsurf, boolean need_back) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_display *kdpy = ksurf->kdpy; + int num_framebuffers = (need_back) ? 2 : 1; + int i, err; + + for (i = 0; i < num_framebuffers; i++) { + struct kms_framebuffer *fb; + enum native_attachment natt; + unsigned int handle, stride; + uint block_bits; + + if (i == 0) { + fb = &ksurf->front_fb; + natt = NATIVE_ATTACHMENT_FRONT_LEFT; + } + else { + fb = &ksurf->back_fb; + natt = NATIVE_ATTACHMENT_BACK_LEFT; + } + + if (!fb->texture) { + /* make sure the texture has been allocated */ + kms_surface_validate(&ksurf->base, &natt, 1, NULL, NULL, NULL, NULL); + if (!ksurf->textures[natt]) + return FALSE; + + pipe_texture_reference(&fb->texture, ksurf->textures[natt]); + } + + /* already initialized */ + if (fb->buffer_id) + continue; + + /* TODO detect the real value */ + fb->is_passive = TRUE; + + if (!kdpy->api->local_handle_from_texture(kdpy->api, + kdpy->base.screen, fb->texture, &stride, &handle)) + return FALSE; + + block_bits = util_format_get_blocksizebits(ksurf->color_format); + err = drmModeAddFB(kdpy->fd, ksurf->width, ksurf->height, + block_bits, block_bits, stride, handle, &fb->buffer_id); + if (err) { + fb->buffer_id = 0; + return FALSE; + } + } + + return TRUE; +} + +static boolean +kms_surface_flush_frontbuffer(struct native_surface *nsurf) +{ +#ifdef DRM_MODE_FEATURE_DIRTYFB + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_display *kdpy = ksurf->kdpy; + + /* pbuffer is private */ + if (ksurf->type == KMS_SURFACE_TYPE_PBUFFER) + return TRUE; + + if (ksurf->front_fb.is_passive) + drmModeDirtyFB(kdpy->fd, ksurf->front_fb.buffer_id, NULL, 0); +#endif + + return TRUE; +} + +static boolean +kms_surface_swap_buffers(struct native_surface *nsurf) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + struct kms_crtc *kcrtc = &ksurf->current_crtc; + struct kms_display *kdpy = ksurf->kdpy; + struct kms_framebuffer tmp_fb; + struct pipe_texture *tmp_texture; + int err; + + /* pbuffer is private */ + if (ksurf->type == KMS_SURFACE_TYPE_PBUFFER) + return TRUE; + + if (!ksurf->back_fb.buffer_id) { + if (!kms_surface_init_framebuffers(&ksurf->base, TRUE)) + return FALSE; + } + + if (ksurf->is_shown && kcrtc->crtc) { + err = drmModeSetCrtc(kdpy->fd, kcrtc->crtc->crtc_id, + ksurf->back_fb.buffer_id, kcrtc->crtc->x, kcrtc->crtc->y, + kcrtc->connectors, kcrtc->num_connectors, &kcrtc->crtc->mode); + if (err) + return FALSE; + } + + /* swap the buffers */ + tmp_fb = ksurf->front_fb; + ksurf->front_fb = ksurf->back_fb; + ksurf->back_fb = tmp_fb; + + tmp_texture = ksurf->textures[NATIVE_ATTACHMENT_FRONT_LEFT]; + ksurf->textures[NATIVE_ATTACHMENT_FRONT_LEFT] = + ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT]; + ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT] = tmp_texture; + + /* the front/back textures are swapped */ + ksurf->sequence_number++; + + return TRUE; +} + +static void +kms_surface_wait(struct native_surface *nsurf) +{ + /* no-op */ +} + +static void +kms_surface_destroy(struct native_surface *nsurf) +{ + struct kms_surface *ksurf = kms_surface(nsurf); + int i; + + if (ksurf->current_crtc.crtc) + drmModeFreeCrtc(ksurf->current_crtc.crtc); + + if (ksurf->front_fb.buffer_id) + drmModeRmFB(ksurf->kdpy->fd, ksurf->front_fb.buffer_id); + pipe_texture_reference(&ksurf->front_fb.texture, NULL); + + if (ksurf->back_fb.buffer_id) + drmModeRmFB(ksurf->kdpy->fd, ksurf->back_fb.buffer_id); + pipe_texture_reference(&ksurf->back_fb.texture, NULL); + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct pipe_texture *ptex = ksurf->textures[i]; + pipe_texture_reference(&ptex, NULL); + } + + free(ksurf); +} + +static struct kms_surface * +kms_display_create_surface(struct native_display *ndpy, + enum kms_surface_type type, + const struct native_config *nconf, + uint width, uint height) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_config *kconf = kms_config(nconf); + struct kms_surface *ksurf; + + ksurf = CALLOC_STRUCT(kms_surface); + if (!ksurf) + return NULL; + + ksurf->kdpy = kdpy; + ksurf->type = type; + ksurf->color_format = kconf->base.color_format; + ksurf->width = width; + ksurf->height = height; + + ksurf->base.destroy = kms_surface_destroy; + ksurf->base.swap_buffers = kms_surface_swap_buffers; + ksurf->base.flush_frontbuffer = kms_surface_flush_frontbuffer; + ksurf->base.validate = kms_surface_validate; + ksurf->base.wait = kms_surface_wait; + + return ksurf; +} + +/** + * Choose a CRTC that supports all given connectors. + */ +static uint32_t +kms_display_choose_crtc(struct native_display *ndpy, + uint32_t *connectors, int num_connectors) +{ + struct kms_display *kdpy = kms_display(ndpy); + int idx; + + for (idx = 0; idx < kdpy->resources->count_crtcs; idx++) { + boolean found_crtc = TRUE; + int i, j; + + for (i = 0; i < num_connectors; i++) { + drmModeConnectorPtr connector; + int encoder_idx = -1; + + connector = drmModeGetConnector(kdpy->fd, connectors[i]); + if (!connector) { + found_crtc = FALSE; + break; + } + + /* find an encoder the CRTC supports */ + for (j = 0; j < connector->count_encoders; j++) { + drmModeEncoderPtr encoder = + drmModeGetEncoder(kdpy->fd, connector->encoders[j]); + if (encoder->possible_crtcs & (1 << idx)) { + encoder_idx = j; + break; + } + drmModeFreeEncoder(encoder); + } + + drmModeFreeConnector(connector); + if (encoder_idx < 0) { + found_crtc = FALSE; + break; + } + } + + if (found_crtc) + break; + } + + if (idx >= kdpy->resources->count_crtcs) { + _eglLog(_EGL_WARNING, + "failed to find a CRTC that supports the given %d connectors", + num_connectors); + return 0; + } + + return kdpy->resources->crtcs[idx]; +} + +/** + * Remember the original CRTC status and set the CRTC + */ +static boolean +kms_display_set_crtc(struct native_display *ndpy, int crtc_idx, + uint32_t buffer_id, uint32_t x, uint32_t y, + uint32_t *connectors, int num_connectors, + drmModeModeInfoPtr mode) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_crtc *kcrtc = &kdpy->saved_crtcs[crtc_idx]; + uint32_t crtc_id; + int err; + + if (kcrtc->crtc) { + crtc_id = kcrtc->crtc->crtc_id; + } + else { + int count = 0, i; + + /* + * Choose the CRTC once. It could be more dynamic, but let's keep it + * simple for now. + */ + crtc_id = kms_display_choose_crtc(&kdpy->base, + connectors, num_connectors); + + /* save the original CRTC status */ + kcrtc->crtc = drmModeGetCrtc(kdpy->fd, crtc_id); + if (!kcrtc->crtc) + return FALSE; + + for (i = 0; i < kdpy->num_connectors; i++) { + struct kms_connector *kconn = &kdpy->connectors[i]; + drmModeConnectorPtr connector = kconn->connector; + drmModeEncoderPtr encoder; + + encoder = drmModeGetEncoder(kdpy->fd, connector->encoder_id); + if (encoder) { + if (encoder->crtc_id == crtc_id) { + kcrtc->connectors[count++] = connector->connector_id; + if (count >= Elements(kcrtc->connectors)) + break; + } + drmModeFreeEncoder(encoder); + } + } + + kcrtc->num_connectors = count; + } + + err = drmModeSetCrtc(kdpy->fd, crtc_id, buffer_id, x, y, + connectors, num_connectors, mode); + if (err) { + drmModeFreeCrtc(kcrtc->crtc); + kcrtc->crtc = NULL; + kcrtc->num_connectors = 0; + + return FALSE; + } + + return TRUE; +} + +static boolean +kms_display_program(struct native_display *ndpy, int crtc_idx, + struct native_surface *nsurf, uint x, uint y, + const struct native_connector **nconns, int num_nconns, + const struct native_mode *nmode) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_surface *ksurf = kms_surface(nsurf); + const struct kms_mode *kmode = kms_mode(nmode); + uint32_t connector_ids[32]; + uint32_t buffer_id; + drmModeModeInfo mode_tmp, *mode; + int i; + + if (num_nconns > Elements(connector_ids)) { + _eglLog(_EGL_WARNING, "too many connectors (%d)", num_nconns); + num_nconns = Elements(connector_ids); + } + + if (ksurf) { + if (!kms_surface_init_framebuffers(&ksurf->base, FALSE)) + return FALSE; + + buffer_id = ksurf->front_fb.buffer_id; + /* the mode argument of drmModeSetCrtc is not constified */ + mode_tmp = kmode->mode; + mode = &mode_tmp; + } + else { + /* disable the CRTC */ + buffer_id = 0; + mode = NULL; + num_nconns = 0; + } + + for (i = 0; i < num_nconns; i++) { + struct kms_connector *kconn = kms_connector(nconns[i]); + connector_ids[i] = kconn->connector->connector_id; + } + + if (!kms_display_set_crtc(&kdpy->base, crtc_idx, buffer_id, x, y, + connector_ids, num_nconns, mode)) { + _eglLog(_EGL_WARNING, "failed to set CRTC %d", crtc_idx); + + return FALSE; + } + + if (kdpy->shown_surfaces[crtc_idx]) + kdpy->shown_surfaces[crtc_idx]->is_shown = FALSE; + kdpy->shown_surfaces[crtc_idx] = ksurf; + + /* remember the settings for buffer swapping */ + if (ksurf) { + uint32_t crtc_id = kdpy->saved_crtcs[crtc_idx].crtc->crtc_id; + struct kms_crtc *kcrtc = &ksurf->current_crtc; + + if (kcrtc->crtc) + drmModeFreeCrtc(kcrtc->crtc); + kcrtc->crtc = drmModeGetCrtc(kdpy->fd, crtc_id); + + assert(num_nconns < Elements(kcrtc->connectors)); + memcpy(kcrtc->connectors, connector_ids, + sizeof(*connector_ids) * num_nconns); + kcrtc->num_connectors = num_nconns; + + ksurf->is_shown = TRUE; + } + + return TRUE; +} + +static const struct native_mode ** +kms_display_get_modes(struct native_display *ndpy, + const struct native_connector *nconn, + int *num_modes) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct kms_connector *kconn = kms_connector(nconn); + const struct native_mode **nmodes_return; + int count, i; + + /* delete old data */ + if (kconn->connector) { + drmModeFreeConnector(kconn->connector); + free(kconn->kms_modes); + + kconn->connector = NULL; + kconn->kms_modes = NULL; + kconn->num_modes = 0; + } + + /* detect again */ + kconn->connector = drmModeGetConnector(kdpy->fd, kconn->connector_id); + if (!kconn->connector) + return NULL; + + count = kconn->connector->count_modes; + kconn->kms_modes = calloc(count, sizeof(*kconn->kms_modes)); + if (!kconn->kms_modes) { + drmModeFreeConnector(kconn->connector); + kconn->connector = NULL; + + return NULL; + } + + for (i = 0; i < count; i++) { + struct kms_mode *kmode = &kconn->kms_modes[i]; + drmModeModeInfoPtr mode = &kconn->connector->modes[i]; + + kmode->mode = *mode; + + kmode->base.desc = kmode->mode.name; + kmode->base.width = kmode->mode.hdisplay; + kmode->base.height = kmode->mode.vdisplay; + kmode->base.refresh_rate = kmode->mode.vrefresh / 1000; + } + + nmodes_return = malloc(count * sizeof(*nmodes_return)); + if (nmodes_return) { + for (i = 0; i < count; i++) + nmodes_return[i] = &kconn->kms_modes[i].base; + if (num_modes) + *num_modes = count; + } + + return nmodes_return; +} + +static const struct native_connector ** +kms_display_get_connectors(struct native_display *ndpy, int *num_connectors, + int *num_crtc) +{ + struct kms_display *kdpy = kms_display(ndpy); + const struct native_connector **connectors; + int i; + + if (!kdpy->connectors) { + kdpy->connectors = + calloc(kdpy->resources->count_connectors, sizeof(*kdpy->connectors)); + if (!kdpy->connectors) + return NULL; + + for (i = 0; i < kdpy->resources->count_connectors; i++) { + struct kms_connector *kconn = &kdpy->connectors[i]; + + kconn->connector_id = kdpy->resources->connectors[i]; + /* kconn->connector is allocated when the modes are asked */ + } + + kdpy->num_connectors = kdpy->resources->count_connectors; + } + + connectors = malloc(kdpy->num_connectors * sizeof(*connectors)); + if (connectors) { + for (i = 0; i < kdpy->num_connectors; i++) + connectors[i] = &kdpy->connectors[i].base; + if (num_connectors) + *num_connectors = kdpy->num_connectors; + } + + if (num_crtc) + *num_crtc = kdpy->resources->count_crtcs; + + return connectors; +} + +static struct native_surface * +kms_display_create_scanout_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct kms_surface *ksurf; + + ksurf = kms_display_create_surface(ndpy, + KMS_SURFACE_TYPE_SCANOUT, nconf, width, height); + return &ksurf->base; +} + +static struct native_surface * +kms_display_create_pbuffer_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct kms_surface *ksurf; + + ksurf = kms_display_create_surface(ndpy, + KMS_SURFACE_TYPE_PBUFFER, nconf, width, height); + return &ksurf->base; +} + +static struct pipe_context * +kms_display_create_context(struct native_display *ndpy, void *context_private) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct pipe_context *pctx; + + pctx = kdpy->api->create_context(kdpy->api, kdpy->base.screen); + if (pctx) + pctx->priv = context_private; + return pctx; +} + +static boolean +kms_display_is_format_supported(struct native_display *ndpy, + enum pipe_format fmt, boolean is_color) +{ + return ndpy->screen->is_format_supported(ndpy->screen, + fmt, PIPE_TEXTURE_2D, + (is_color) ? PIPE_TEXTURE_USAGE_RENDER_TARGET : + PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); +} + +static const struct native_config ** +kms_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct kms_display *kdpy = kms_display(ndpy); + const struct native_config **configs; + + /* first time */ + if (!kdpy->config) { + struct native_config *nconf; + enum pipe_format format; + + kdpy->config = calloc(1, sizeof(*kdpy->config)); + if (!kdpy->config) + return NULL; + + nconf = &kdpy->config->base; + + /* always double-buffered */ + nconf->mode.doubleBufferMode = TRUE; + + format = PIPE_FORMAT_A8R8G8B8_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, TRUE)) { + format = PIPE_FORMAT_B8G8R8A8_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, TRUE)) + format = PIPE_FORMAT_NONE; + } + if (format == PIPE_FORMAT_NONE) + return NULL; + + nconf->color_format = format; + nconf->mode.redBits = 8; + nconf->mode.greenBits = 8; + nconf->mode.blueBits = 8; + nconf->mode.alphaBits = 8; + nconf->mode.rgbBits = 32; + + format = PIPE_FORMAT_S8Z24_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, FALSE)) { + format = PIPE_FORMAT_Z24S8_UNORM; + if (!kms_display_is_format_supported(&kdpy->base, format, FALSE)) + format = PIPE_FORMAT_NONE; + } + if (format != PIPE_FORMAT_NONE) { + nconf->depth_format = format; + nconf->stencil_format = format; + + nconf->mode.depthBits = 24; + nconf->mode.stencilBits = 8; + nconf->mode.haveDepthBuffer = TRUE; + nconf->mode.haveStencilBuffer = TRUE; + } + + nconf->scanout_bit = TRUE; + nconf->mode.drawableType = GLX_PBUFFER_BIT; + nconf->mode.swapMethod = GLX_SWAP_EXCHANGE_OML; + + nconf->mode.visualID = 0; + nconf->mode.visualType = EGL_NONE; + + nconf->mode.renderType = GLX_RGBA_BIT; + nconf->mode.rgbMode = TRUE; + nconf->mode.xRenderable = FALSE; + } + + configs = malloc(sizeof(*configs)); + if (configs) { + configs[0] = &kdpy->config->base; + if (num_configs) + *num_configs = 1; + } + + return configs; +} + +static void +kms_display_destroy(struct native_display *ndpy) +{ + struct kms_display *kdpy = kms_display(ndpy); + int i; + + if (kdpy->config) + free(kdpy->config); + + if (kdpy->connectors) { + for (i = 0; i < kdpy->num_connectors; i++) { + struct kms_connector *kconn = &kdpy->connectors[i]; + if (kconn->connector) { + drmModeFreeConnector(kconn->connector); + free(kconn->kms_modes); + } + } + free(kdpy->connectors); + } + + if (kdpy->shown_surfaces) + free(kdpy->shown_surfaces); + + if (kdpy->saved_crtcs) { + for (i = 0; i < kdpy->resources->count_crtcs; i++) { + struct kms_crtc *kcrtc = &kdpy->saved_crtcs[i]; + + if (kcrtc->crtc) { + /* restore crtc */ + drmModeSetCrtc(kdpy->fd, kcrtc->crtc->crtc_id, + kcrtc->crtc->buffer_id, kcrtc->crtc->x, kcrtc->crtc->y, + kcrtc->connectors, kcrtc->num_connectors, + &kcrtc->crtc->mode); + + drmModeFreeCrtc(kcrtc->crtc); + } + } + free(kdpy->saved_crtcs); + } + + if (kdpy->resources) + drmModeFreeResources(kdpy->resources); + + if (kdpy->base.screen) + kdpy->base.screen->destroy(kdpy->base.screen); + + if (kdpy->fd >= 0) + drmClose(kdpy->fd); + + if (kdpy->api) + kdpy->api->destroy(kdpy->api); + free(kdpy); +} + +/** + * Initialize KMS and pipe screen. + */ +static boolean +kms_display_init_screen(struct native_display *ndpy) +{ + struct kms_display *kdpy = kms_display(ndpy); + struct drm_create_screen_arg arg; + int fd; + + fd = drmOpen(kdpy->api->name, NULL); + if (fd < 0) { + _eglLog(_EGL_WARNING, "failed to open DRM device"); + return FALSE; + } + +#if 0 + if (drmSetMaster(fd)) { + _eglLog(_EGL_WARNING, "failed to become DRM master"); + return FALSE; + } +#endif + + memset(&arg, 0, sizeof(arg)); + arg.mode = DRM_CREATE_NORMAL; + kdpy->base.screen = kdpy->api->create_screen(kdpy->api, fd, &arg); + if (!kdpy->base.screen) { + _eglLog(_EGL_WARNING, "failed to create DRM screen"); + drmClose(fd); + return FALSE; + } + + kdpy->fd = fd; + + return TRUE; +} + +static struct native_display_modeset kms_display_modeset = { + .get_connectors = kms_display_get_connectors, + .get_modes = kms_display_get_modes, + .create_scanout_surface = kms_display_create_scanout_surface, + .program = kms_display_program +}; + +static struct native_display * +kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api) +{ + struct kms_display *kdpy; + + kdpy = CALLOC_STRUCT(kms_display); + if (!kdpy) + return NULL; + + kdpy->api = api; + if (!kdpy->api) { + _eglLog(_EGL_WARNING, "failed to create DRM API"); + free(kdpy); + return NULL; + } + + kdpy->fd = -1; + if (!kms_display_init_screen(&kdpy->base)) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + /* resources are fixed, unlike crtc, connector, or encoder */ + kdpy->resources = drmModeGetResources(kdpy->fd); + if (!kdpy->resources) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + kdpy->saved_crtcs = + calloc(kdpy->resources->count_crtcs, sizeof(*kdpy->saved_crtcs)); + if (!kdpy->saved_crtcs) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + kdpy->shown_surfaces = + calloc(kdpy->resources->count_crtcs, sizeof(*kdpy->shown_surfaces)); + if (!kdpy->shown_surfaces) { + kms_display_destroy(&kdpy->base); + return NULL; + } + + kdpy->base.destroy = kms_display_destroy; + kdpy->base.get_configs = kms_display_get_configs; + kdpy->base.create_context = kms_display_create_context; + kdpy->base.create_pbuffer_surface = kms_display_create_pbuffer_surface; + + kdpy->base.modeset = &kms_display_modeset; + + return &kdpy->base; +} + +/* the api is destroyed with the native display */ +static struct drm_api *drm_api; + +const char * +native_get_name(void) +{ + static char kms_name[32]; + + if (!drm_api) + drm_api = drm_api_create(); + + if (drm_api) + snprintf(kms_name, sizeof(kms_name), "KMS/%s", drm_api->name); + else + snprintf(kms_name, sizeof(kms_name), "KMS"); + + return kms_name; +} + +struct native_display * +native_create_display(EGLNativeDisplayType dpy) +{ + struct native_display *ndpy = NULL; + + if (!drm_api) + drm_api = drm_api_create(); + + if (drm_api) + ndpy = kms_create_display(dpy, drm_api); + + return ndpy; +} diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.h b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h new file mode 100644 index 00000000000..095186e3cf3 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h @@ -0,0 +1,139 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NATIVE_KMS_H_ +#define _NATIVE_KMS_H_ + +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "pipe/p_compiler.h" +#include "util/u_format.h" +#include "pipe/p_state.h" +#include "state_tracker/drm_api.h" + +#include "common/native.h" + +enum kms_surface_type { + KMS_SURFACE_TYPE_PBUFFER, + KMS_SURFACE_TYPE_SCANOUT +}; + +struct kms_config; +struct kms_connector; +struct kms_mode; + +struct kms_crtc { + drmModeCrtcPtr crtc; + uint32_t connectors[32]; + int num_connectors; +}; + +struct kms_display { + struct native_display base; + + int fd; + struct drm_api *api; + drmModeResPtr resources; + struct kms_config *config; + + struct kms_connector *connectors; + int num_connectors; + + struct kms_surface **shown_surfaces; + /* save the original settings of the CRTCs */ + struct kms_crtc *saved_crtcs; +}; + +struct kms_framebuffer { + struct pipe_texture *texture; + boolean is_passive; + + uint32_t buffer_id; +}; + +struct kms_surface { + struct native_surface base; + enum kms_surface_type type; + enum pipe_format color_format; + struct kms_display *kdpy; + int width, height; + + struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; + struct kms_framebuffer front_fb, back_fb; + + boolean is_shown; + struct kms_crtc current_crtc; +}; + +struct kms_config { + struct native_config base; +}; + +struct kms_connector { + struct native_connector base; + + uint32_t connector_id; + drmModeConnectorPtr connector; + struct kms_mode *kms_modes; + int num_modes; +}; + +struct kms_mode { + struct native_mode base; + drmModeModeInfo mode; +}; + +static INLINE struct kms_display * +kms_display(const struct native_display *ndpy) +{ + return (struct kms_display *) ndpy; +} + +static INLINE struct kms_surface * +kms_surface(const struct native_surface *nsurf) +{ + return (struct kms_surface *) nsurf; +} + +static INLINE struct kms_config * +kms_config(const struct native_config *nconf) +{ + return (struct kms_config *) nconf; +} + +static INLINE struct kms_connector * +kms_connector(const struct native_connector *nconn) +{ + return (struct kms_connector *) nconn; +} + +static INLINE struct kms_mode * +kms_mode(const struct native_mode *nmode) +{ + return (struct kms_mode *) nmode; +} + +#endif /* _NATIVE_KMS_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/x11/glxinit.c b/src/gallium/state_trackers/egl_g3d/x11/glxinit.c new file mode 100644 index 00000000000..c955a908b93 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/glxinit.c @@ -0,0 +1,573 @@ +/** + * GLX initialization. Code based on glxext.c, glx_query.c, and + * glcontextmodes.c under src/glx/x11/. The major difference is that no DRI + * related code here. + * + */ + +#include <assert.h> +#include <X11/Xlib.h> +#include <X11/Xproto.h> +#include <X11/extensions/Xext.h> +#include <X11/extensions/extutil.h> +#include <sys/time.h> + +#include "x11_screen.h" +#include "glxinit.h" + +typedef struct GLXGenericGetString +{ + CARD8 reqType; + CARD8 glxCode; + CARD16 length B16; + CARD32 for_whom B32; + CARD32 name B32; +} xGLXGenericGetStringReq; + +#define sz_xGLXGenericGetStringReq 12 +#define X_GLXGenericGetString 0 + +/* Extension required boiler plate */ + +static char *__glXExtensionName = GLX_EXTENSION_NAME; +static XExtensionInfo *__glXExtensionInfo = NULL; + +static /* const */ XExtensionHooks __glXExtensionHooks = { NULL }; +static +XEXT_GENERATE_FIND_DISPLAY(__glXFindDisplay, __glXExtensionInfo, + __glXExtensionName, &__glXExtensionHooks, + __GLX_NUMBER_EVENTS, NULL) + +static GLint +_gl_convert_from_x_visual_type(int visualType) +{ +#define NUM_VISUAL_TYPES 6 + static const int glx_visual_types[NUM_VISUAL_TYPES] = { + GLX_STATIC_GRAY, GLX_GRAY_SCALE, + GLX_STATIC_COLOR, GLX_PSEUDO_COLOR, + GLX_TRUE_COLOR, GLX_DIRECT_COLOR + }; + + return ((unsigned) visualType < NUM_VISUAL_TYPES) + ? glx_visual_types[visualType] : GLX_NONE; +} + +_X_HIDDEN char * +__glXQueryServerString(Display * dpy, int opcode, CARD32 screen, CARD32 name) +{ + xGLXGenericGetStringReq *req; + xGLXSingleReply reply; + int length; + int numbytes; + char *buf; + CARD32 for_whom = screen; + CARD32 glxCode = X_GLXQueryServerString; + + + LockDisplay(dpy); + + + /* All of the GLX protocol requests for getting a string from the server + * look the same. The exact meaning of the for_whom field is usually + * either the screen number (for glXQueryServerString) or the context tag + * (for GLXSingle). + */ + + GetReq(GLXGenericGetString, req); + req->reqType = opcode; + req->glxCode = glxCode; + req->for_whom = for_whom; + req->name = name; + + _XReply(dpy, (xReply *) & reply, 0, False); + + length = reply.length * 4; + numbytes = reply.size; + + buf = (char *) Xmalloc(numbytes); + if (buf != NULL) { + _XRead(dpy, buf, numbytes); + length -= numbytes; + } + + _XEatData(dpy, length); + + UnlockDisplay(dpy); + SyncHandle(); + + return buf; +} + +/************************************************************************/ +/* +** Free the per screen configs data as well as the array of +** __glXScreenConfigs. +*/ +static void +FreeScreenConfigs(__GLXdisplayPrivate * priv) +{ + __GLXscreenConfigs *psc; + GLint i, screens; + + /* Free screen configuration information */ + psc = priv->screenConfigs; + screens = ScreenCount(priv->dpy); + for (i = 0; i < screens; i++, psc++) { + if (psc->configs) { + x11_context_modes_destroy(psc->configs); + psc->configs = NULL; /* NOTE: just for paranoia */ + } + if (psc->visuals) { + x11_context_modes_destroy(psc->visuals); + psc->visuals = NULL; /* NOTE: just for paranoia */ + } + Xfree((char *) psc->serverGLXexts); + } + XFree((char *) priv->screenConfigs); + priv->screenConfigs = NULL; +} + +/************************************************************************/ + +/* +** Query the version of the GLX extension. This procedure works even if +** the client extension is not completely set up. +*/ +static Bool +QueryVersion(Display * dpy, int opcode, int *major, int *minor) +{ + xGLXQueryVersionReq *req; + xGLXQueryVersionReply reply; + + /* Send the glXQueryVersion request */ + LockDisplay(dpy); + GetReq(GLXQueryVersion, req); + req->reqType = opcode; + req->glxCode = X_GLXQueryVersion; + req->majorVersion = GLX_MAJOR_VERSION; + req->minorVersion = GLX_MINOR_VERSION; + _XReply(dpy, (xReply *) & reply, 0, False); + UnlockDisplay(dpy); + SyncHandle(); + + if (reply.majorVersion != GLX_MAJOR_VERSION) { + /* + ** The server does not support the same major release as this + ** client. + */ + return GL_FALSE; + } + *major = reply.majorVersion; + *minor = min(reply.minorVersion, GLX_MINOR_VERSION); + return GL_TRUE; +} + +_X_HIDDEN void +__glXInitializeVisualConfigFromTags(__GLcontextModes * config, int count, + const INT32 * bp, Bool tagged_only, + Bool fbconfig_style_tags) +{ + int i; + + if (!tagged_only) { + /* Copy in the first set of properties */ + config->visualID = *bp++; + + config->visualType = _gl_convert_from_x_visual_type(*bp++); + + config->rgbMode = *bp++; + + config->redBits = *bp++; + config->greenBits = *bp++; + config->blueBits = *bp++; + config->alphaBits = *bp++; + config->accumRedBits = *bp++; + config->accumGreenBits = *bp++; + config->accumBlueBits = *bp++; + config->accumAlphaBits = *bp++; + + config->doubleBufferMode = *bp++; + config->stereoMode = *bp++; + + config->rgbBits = *bp++; + config->depthBits = *bp++; + config->stencilBits = *bp++; + config->numAuxBuffers = *bp++; + config->level = *bp++; + + count -= __GLX_MIN_CONFIG_PROPS; + } + + /* + ** Additional properties may be in a list at the end + ** of the reply. They are in pairs of property type + ** and property value. + */ + +#define FETCH_OR_SET(tag) \ + config-> tag = ( fbconfig_style_tags ) ? *bp++ : 1 + + for (i = 0; i < count; i += 2) { + switch (*bp++) { + case GLX_RGBA: + FETCH_OR_SET(rgbMode); + break; + case GLX_BUFFER_SIZE: + config->rgbBits = *bp++; + break; + case GLX_LEVEL: + config->level = *bp++; + break; + case GLX_DOUBLEBUFFER: + FETCH_OR_SET(doubleBufferMode); + break; + case GLX_STEREO: + FETCH_OR_SET(stereoMode); + break; + case GLX_AUX_BUFFERS: + config->numAuxBuffers = *bp++; + break; + case GLX_RED_SIZE: + config->redBits = *bp++; + break; + case GLX_GREEN_SIZE: + config->greenBits = *bp++; + break; + case GLX_BLUE_SIZE: + config->blueBits = *bp++; + break; + case GLX_ALPHA_SIZE: + config->alphaBits = *bp++; + break; + case GLX_DEPTH_SIZE: + config->depthBits = *bp++; + break; + case GLX_STENCIL_SIZE: + config->stencilBits = *bp++; + break; + case GLX_ACCUM_RED_SIZE: + config->accumRedBits = *bp++; + break; + case GLX_ACCUM_GREEN_SIZE: + config->accumGreenBits = *bp++; + break; + case GLX_ACCUM_BLUE_SIZE: + config->accumBlueBits = *bp++; + break; + case GLX_ACCUM_ALPHA_SIZE: + config->accumAlphaBits = *bp++; + break; + case GLX_VISUAL_CAVEAT_EXT: + config->visualRating = *bp++; + break; + case GLX_X_VISUAL_TYPE: + config->visualType = *bp++; + break; + case GLX_TRANSPARENT_TYPE: + config->transparentPixel = *bp++; + break; + case GLX_TRANSPARENT_INDEX_VALUE: + config->transparentIndex = *bp++; + break; + case GLX_TRANSPARENT_RED_VALUE: + config->transparentRed = *bp++; + break; + case GLX_TRANSPARENT_GREEN_VALUE: + config->transparentGreen = *bp++; + break; + case GLX_TRANSPARENT_BLUE_VALUE: + config->transparentBlue = *bp++; + break; + case GLX_TRANSPARENT_ALPHA_VALUE: + config->transparentAlpha = *bp++; + break; + case GLX_VISUAL_ID: + config->visualID = *bp++; + break; + case GLX_DRAWABLE_TYPE: + config->drawableType = *bp++; + break; + case GLX_RENDER_TYPE: + config->renderType = *bp++; + break; + case GLX_X_RENDERABLE: + config->xRenderable = *bp++; + break; + case GLX_FBCONFIG_ID: + config->fbconfigID = *bp++; + break; + case GLX_MAX_PBUFFER_WIDTH: + config->maxPbufferWidth = *bp++; + break; + case GLX_MAX_PBUFFER_HEIGHT: + config->maxPbufferHeight = *bp++; + break; + case GLX_MAX_PBUFFER_PIXELS: + config->maxPbufferPixels = *bp++; + break; + case GLX_OPTIMAL_PBUFFER_WIDTH_SGIX: + config->optimalPbufferWidth = *bp++; + break; + case GLX_OPTIMAL_PBUFFER_HEIGHT_SGIX: + config->optimalPbufferHeight = *bp++; + break; + case GLX_VISUAL_SELECT_GROUP_SGIX: + config->visualSelectGroup = *bp++; + break; + case GLX_SWAP_METHOD_OML: + config->swapMethod = *bp++; + break; + case GLX_SAMPLE_BUFFERS_SGIS: + config->sampleBuffers = *bp++; + break; + case GLX_SAMPLES_SGIS: + config->samples = *bp++; + break; + case GLX_BIND_TO_TEXTURE_RGB_EXT: + config->bindToTextureRgb = *bp++; + break; + case GLX_BIND_TO_TEXTURE_RGBA_EXT: + config->bindToTextureRgba = *bp++; + break; + case GLX_BIND_TO_MIPMAP_TEXTURE_EXT: + config->bindToMipmapTexture = *bp++; + break; + case GLX_BIND_TO_TEXTURE_TARGETS_EXT: + config->bindToTextureTargets = *bp++; + break; + case GLX_Y_INVERTED_EXT: + config->yInverted = *bp++; + break; + case None: + i = count; + break; + default: + break; + } + } + + config->renderType = + (config->rgbMode) ? GLX_RGBA_BIT : GLX_COLOR_INDEX_BIT; + + config->haveAccumBuffer = ((config->accumRedBits + + config->accumGreenBits + + config->accumBlueBits + + config->accumAlphaBits) > 0); + config->haveDepthBuffer = (config->depthBits > 0); + config->haveStencilBuffer = (config->stencilBits > 0); +} + +static __GLcontextModes * +createConfigsFromProperties(Display * dpy, int nvisuals, int nprops, + int screen, GLboolean tagged_only) +{ + INT32 buf[__GLX_TOTAL_CONFIG], *props; + unsigned prop_size; + __GLcontextModes *modes, *m; + int i; + + if (nprops == 0) + return NULL; + + /* FIXME: Is the __GLX_MIN_CONFIG_PROPS test correct for FBconfigs? */ + + /* Check number of properties */ + if (nprops < __GLX_MIN_CONFIG_PROPS || nprops > __GLX_MAX_CONFIG_PROPS) + return NULL; + + /* Allocate memory for our config structure */ + modes = x11_context_modes_create(nvisuals); + if (!modes) + return NULL; + + prop_size = nprops * __GLX_SIZE_INT32; + if (prop_size <= sizeof(buf)) + props = buf; + else + props = Xmalloc(prop_size); + + /* Read each config structure and convert it into our format */ + m = modes; + for (i = 0; i < nvisuals; i++) { + _XRead(dpy, (char *) props, prop_size); + /* Older X servers don't send this so we default it here. */ + m->drawableType = GLX_WINDOW_BIT; + __glXInitializeVisualConfigFromTags(m, nprops, props, + tagged_only, GL_TRUE); + m->screen = screen; + m = m->next; + } + + if (props != buf) + Xfree(props); + + return modes; +} + +static GLboolean +getVisualConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) +{ + xGLXGetVisualConfigsReq *req; + __GLXscreenConfigs *psc; + xGLXGetVisualConfigsReply reply; + + LockDisplay(dpy); + + psc = priv->screenConfigs + screen; + psc->visuals = NULL; + GetReq(GLXGetVisualConfigs, req); + req->reqType = priv->majorOpcode; + req->glxCode = X_GLXGetVisualConfigs; + req->screen = screen; + + if (!_XReply(dpy, (xReply *) & reply, 0, False)) + goto out; + + psc->visuals = createConfigsFromProperties(dpy, + reply.numVisuals, + reply.numProps, + screen, GL_FALSE); + + out: + UnlockDisplay(dpy); + return psc->visuals != NULL; +} + +static GLboolean +getFBConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) +{ + xGLXGetFBConfigsReq *fb_req; + xGLXGetFBConfigsSGIXReq *sgi_req; + xGLXVendorPrivateWithReplyReq *vpreq; + xGLXGetFBConfigsReply reply; + __GLXscreenConfigs *psc; + + psc = priv->screenConfigs + screen; + psc->serverGLXexts = + __glXQueryServerString(dpy, priv->majorOpcode, screen, GLX_EXTENSIONS); + + LockDisplay(dpy); + + psc->configs = NULL; + if (atof(priv->serverGLXversion) >= 1.3) { + GetReq(GLXGetFBConfigs, fb_req); + fb_req->reqType = priv->majorOpcode; + fb_req->glxCode = X_GLXGetFBConfigs; + fb_req->screen = screen; + } + else if (strstr(psc->serverGLXexts, "GLX_SGIX_fbconfig") != NULL) { + GetReqExtra(GLXVendorPrivateWithReply, + sz_xGLXGetFBConfigsSGIXReq + + sz_xGLXVendorPrivateWithReplyReq, vpreq); + sgi_req = (xGLXGetFBConfigsSGIXReq *) vpreq; + sgi_req->reqType = priv->majorOpcode; + sgi_req->glxCode = X_GLXVendorPrivateWithReply; + sgi_req->vendorCode = X_GLXvop_GetFBConfigsSGIX; + sgi_req->screen = screen; + } + else + goto out; + + if (!_XReply(dpy, (xReply *) & reply, 0, False)) + goto out; + + psc->configs = createConfigsFromProperties(dpy, + reply.numFBConfigs, + reply.numAttribs * 2, + screen, GL_TRUE); + + out: + UnlockDisplay(dpy); + return psc->configs != NULL; +} + +static GLboolean +AllocAndFetchScreenConfigs(Display * dpy, __GLXdisplayPrivate * priv) +{ + __GLXscreenConfigs *psc; + GLint i, screens; + + /* + ** First allocate memory for the array of per screen configs. + */ + screens = ScreenCount(dpy); + psc = (__GLXscreenConfigs *) Xmalloc(screens * sizeof(__GLXscreenConfigs)); + if (!psc) { + return GL_FALSE; + } + memset(psc, 0, screens * sizeof(__GLXscreenConfigs)); + priv->screenConfigs = psc; + + priv->serverGLXversion = + __glXQueryServerString(dpy, priv->majorOpcode, 0, GLX_VERSION); + if (priv->serverGLXversion == NULL) { + FreeScreenConfigs(priv); + return GL_FALSE; + } + + for (i = 0; i < screens; i++, psc++) { + getFBConfigs(dpy, priv, i); + getVisualConfigs(dpy, priv, i); + psc->scr = i; + psc->dpy = dpy; + } + + SyncHandle(); + + return GL_TRUE; +} + +_X_HIDDEN void +__glXRelease(__GLXdisplayPrivate *dpyPriv) +{ + FreeScreenConfigs(dpyPriv); + + if (dpyPriv->serverGLXvendor) { + Xfree((char *) dpyPriv->serverGLXvendor); + dpyPriv->serverGLXvendor = NULL; + } + if (dpyPriv->serverGLXversion) { + Xfree((char *) dpyPriv->serverGLXversion); + dpyPriv->serverGLXversion = NULL; + } + + Xfree(dpyPriv); +} + +_X_HIDDEN __GLXdisplayPrivate * +__glXInitialize(Display * dpy) +{ + XExtDisplayInfo *info = __glXFindDisplay(dpy); + __GLXdisplayPrivate *dpyPriv; + int major, minor; + + if (!XextHasExtension(info)) + return NULL; + + /* See if the versions are compatible */ + if (!QueryVersion(dpy, info->codes->major_opcode, &major, &minor)) + return NULL; + + dpyPriv = (__GLXdisplayPrivate *) Xcalloc(1, sizeof(__GLXdisplayPrivate)); + if (!dpyPriv) + return NULL; + + /* + ** Init the display private and then read in the screen config + ** structures from the server. + */ + dpyPriv->majorOpcode = info->codes->major_opcode; + dpyPriv->majorVersion = major; + dpyPriv->minorVersion = minor; + dpyPriv->dpy = dpy; + + dpyPriv->serverGLXvendor = NULL; + dpyPriv->serverGLXversion = NULL; + + if (!AllocAndFetchScreenConfigs(dpy, dpyPriv)) { + Xfree(dpyPriv); + return NULL; + } + + return dpyPriv; +} diff --git a/src/gallium/state_trackers/egl_g3d/x11/glxinit.h b/src/gallium/state_trackers/egl_g3d/x11/glxinit.h new file mode 100644 index 00000000000..515a8252220 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/glxinit.h @@ -0,0 +1,14 @@ +#ifndef GLXINIT_INCLUDED +#define GLXINIT_INCLUDED + +#include <X11/Xlib.h> + +#ifndef GLX_DIRECT_RENDERING +#define GLX_DIRECT_RENDERING +#endif +#include "glxclient.h" + +extern void +__glXRelease(__GLXdisplayPrivate *dpyPriv); + +#endif /* GLXINIT_INCLUDED */ diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c new file mode 100644 index 00000000000..f497d8c1c77 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c @@ -0,0 +1,690 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "pipe/p_compiler.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "state_tracker/drm_api.h" +#include "egllog.h" + +#include "native_x11.h" +#include "x11_screen.h" + +enum dri2_surface_type { + DRI2_SURFACE_TYPE_WINDOW, + DRI2_SURFACE_TYPE_PIXMAP, + DRI2_SURFACE_TYPE_PBUFFER +}; + +struct dri2_display { + struct native_display base; + Display *dpy; + boolean own_dpy; + + struct drm_api *api; + struct x11_screen *xscr; + int xscr_number; + + struct dri2_config *configs; + int num_configs; +}; + +struct dri2_surface { + struct native_surface base; + Drawable drawable; + enum dri2_surface_type type; + enum pipe_format color_format; + struct dri2_display *dri2dpy; + + struct pipe_texture *pbuffer_textures[NUM_NATIVE_ATTACHMENTS]; + boolean have_back, have_fake; + int width, height; + unsigned int sequence_number; +}; + +struct dri2_config { + struct native_config base; +}; + +static INLINE struct dri2_display * +dri2_display(const struct native_display *ndpy) +{ + return (struct dri2_display *) ndpy; +} + +static INLINE struct dri2_surface * +dri2_surface(const struct native_surface *nsurf) +{ + return (struct dri2_surface *) nsurf; +} + +static INLINE struct dri2_config * +dri2_config(const struct native_config *nconf) +{ + return (struct dri2_config *) nconf; +} + +static boolean +dri2_surface_flush_frontbuffer(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + + /* pbuffer is private */ + if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) + return TRUE; + + /* copy to real front buffer */ + if (dri2surf->have_fake) + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferFakeFrontLeft, DRI2BufferFrontLeft); + + return TRUE; +} + +static boolean +dri2_surface_swap_buffers(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + + /* pbuffer is private */ + if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) + return TRUE; + + /* copy to front buffer */ + if (dri2surf->have_back) + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferBackLeft, DRI2BufferFrontLeft); + + /* and update fake front buffer */ + if (dri2surf->have_fake) + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferFrontLeft, DRI2BufferFakeFrontLeft); + + return TRUE; +} + +static boolean +dri2_surface_validate(struct native_surface *nsurf, + const enum native_attachment *natts, + unsigned num_natts, + unsigned int *seq_num, + struct pipe_texture **textures, + int *width, int *height) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + unsigned int dri2atts[NUM_NATIVE_ATTACHMENTS]; + EGLint texture_indices[NUM_NATIVE_ATTACHMENTS]; + struct pipe_texture templ; + struct x11_drawable_buffer *xbufs; + int num_ins, num_outs, i; + + if (num_natts) { + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.last_level = 0; + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + templ.depth0 = 1; + templ.format = dri2surf->color_format; + templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; + + if (textures) + memset(textures, 0, sizeof(*textures) * num_natts); + } + + /* create textures for pbuffer */ + if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) { + struct pipe_screen *screen = dri2dpy->base.screen; + + for (i = 0; i < num_natts; i++) { + enum native_attachment natt = natts[i]; + struct pipe_texture *ptex = dri2surf->pbuffer_textures[natt]; + + if (!ptex) { + ptex = screen->texture_create(screen, &templ); + dri2surf->pbuffer_textures[natt] = ptex; + } + + if (textures) + pipe_texture_reference(&textures[i], ptex); + } + + if (seq_num) + *seq_num = dri2surf->sequence_number; + if (width) + *width = dri2surf->width; + if (height) + *height = dri2surf->height; + + return TRUE; + } + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) + texture_indices[i] = -1; + + /* prepare the attachments */ + num_ins = num_natts; + for (i = 0; i < num_natts; i++) { + unsigned int dri2att; + + switch (natts[i]) { + case NATIVE_ATTACHMENT_FRONT_LEFT: + dri2att = DRI2BufferFrontLeft; + break; + case NATIVE_ATTACHMENT_BACK_LEFT: + dri2att = DRI2BufferBackLeft; + break; + case NATIVE_ATTACHMENT_FRONT_RIGHT: + dri2att = DRI2BufferFrontRight; + break; + case NATIVE_ATTACHMENT_BACK_RIGHT: + dri2att = DRI2BufferBackRight; + break; + default: + assert(0); + dri2att = 0; + break; + } + dri2atts[i] = dri2att; + texture_indices[natts[i]] = i; + } + + dri2surf->have_back = FALSE; + dri2surf->have_fake = FALSE; + + /* remember old geometry */ + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + + xbufs = x11_drawable_get_buffers(dri2dpy->xscr, dri2surf->drawable, + &dri2surf->width, &dri2surf->height, + dri2atts, FALSE, num_ins, &num_outs); + if (!xbufs) + return FALSE; + + if (templ.width0 != dri2surf->width || templ.height0 != dri2surf->height) { + /* are there cases where the buffers change and the geometry doesn't? */ + dri2surf->sequence_number++; + + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + } + + for (i = 0; i < num_outs; i++) { + struct x11_drawable_buffer *xbuf = &xbufs[i]; + const char *desc; + enum native_attachment natt; + + switch (xbuf->attachment) { + case DRI2BufferFrontLeft: + natt = NATIVE_ATTACHMENT_FRONT_LEFT; + desc = "DRI2 Front Buffer"; + break; + case DRI2BufferFakeFrontLeft: + natt = NATIVE_ATTACHMENT_FRONT_LEFT; + desc = "DRI2 Fake Front Buffer"; + dri2surf->have_fake = TRUE; + break; + case DRI2BufferBackLeft: + natt = NATIVE_ATTACHMENT_BACK_LEFT; + desc = "DRI2 Back Buffer"; + dri2surf->have_back = TRUE; + break; + default: + desc = NULL; + break; + } + + if (!desc || texture_indices[natt] < 0 || + (textures && textures[texture_indices[natt]])) { + if (!desc) + _eglLog(_EGL_WARNING, "unknown buffer %d", xbuf->attachment); + else if (texture_indices[natt] < 0) + _eglLog(_EGL_WARNING, "unexpected buffer %d", xbuf->attachment); + else if (textures && textures[texture_indices[natt]]) + _eglLog(_EGL_WARNING, "both real and fake front buffers are listed"); + continue; + } + + if (textures) { + struct pipe_texture *ptex = + dri2dpy->api->texture_from_shared_handle(dri2dpy->api, + dri2dpy->base.screen, &templ, + desc, xbuf->pitch, xbuf->name); + if (ptex) { + /* the caller owns the textures */ + textures[texture_indices[natt]] = ptex; + } + } + } + + free(xbufs); + + if (seq_num) + *seq_num = dri2surf->sequence_number; + if (width) + *width = dri2surf->width; + if (height) + *height = dri2surf->height; + + return TRUE; +} + +static void +dri2_surface_wait(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + struct dri2_display *dri2dpy = dri2surf->dri2dpy; + + if (dri2surf->have_fake) { + x11_drawable_copy_buffers(dri2dpy->xscr, dri2surf->drawable, + 0, 0, dri2surf->width, dri2surf->height, + DRI2BufferFrontLeft, DRI2BufferFakeFrontLeft); + } +} + +static void +dri2_surface_destroy(struct native_surface *nsurf) +{ + struct dri2_surface *dri2surf = dri2_surface(nsurf); + int i; + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct pipe_texture *ptex = dri2surf->pbuffer_textures[i]; + pipe_texture_reference(&ptex, NULL); + } + + if (dri2surf->drawable) + x11_drawable_enable_dri2(dri2surf->dri2dpy->xscr, + dri2surf->drawable, FALSE); + free(dri2surf); +} + +static struct dri2_surface * +dri2_display_create_surface(struct native_display *ndpy, + enum dri2_surface_type type, + Drawable drawable, + const struct native_config *nconf) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + struct dri2_config *dri2conf = dri2_config(nconf); + struct dri2_surface *dri2surf; + + dri2surf = CALLOC_STRUCT(dri2_surface); + if (!dri2surf) + return NULL; + + if (drawable) + x11_drawable_enable_dri2(dri2dpy->xscr, drawable, TRUE); + + dri2surf->dri2dpy = dri2dpy; + dri2surf->type = type; + dri2surf->drawable = drawable; + dri2surf->color_format = dri2conf->base.color_format; + + dri2surf->base.destroy = dri2_surface_destroy; + dri2surf->base.swap_buffers = dri2_surface_swap_buffers; + dri2surf->base.flush_frontbuffer = dri2_surface_flush_frontbuffer; + dri2surf->base.validate = dri2_surface_validate; + dri2surf->base.wait = dri2_surface_wait; + + return dri2surf; +} + +static struct native_surface * +dri2_display_create_window_surface(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf) +{ + struct dri2_surface *dri2surf; + + dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_WINDOW, + (Drawable) win, nconf); + return (dri2surf) ? &dri2surf->base : NULL; +} + +static struct native_surface * +dri2_display_create_pixmap_surface(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct dri2_surface *dri2surf; + + dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_PIXMAP, + (Drawable) pix, nconf); + return (dri2surf) ? &dri2surf->base : NULL; +} + +static struct native_surface * +dri2_display_create_pbuffer_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct dri2_surface *dri2surf; + + dri2surf = dri2_display_create_surface(ndpy, DRI2_SURFACE_TYPE_PBUFFER, + (Drawable) None, nconf); + if (dri2surf) { + dri2surf->width = width; + dri2surf->height = height; + } + return (dri2surf) ? &dri2surf->base : NULL; +} + +static struct pipe_context * +dri2_display_create_context(struct native_display *ndpy, void *context_private) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + struct pipe_context *pctx; + + pctx = dri2dpy->api->create_context(dri2dpy->api, dri2dpy->base.screen); + if (pctx) + pctx->priv = context_private; + return pctx; +} + +static int +choose_color_format(const __GLcontextModes *mode, enum pipe_format formats[32]) +{ + int count = 0; + + switch (mode->rgbBits) { + case 32: + formats[count++] = PIPE_FORMAT_A8R8G8B8_UNORM; + formats[count++] = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case 24: + formats[count++] = PIPE_FORMAT_X8R8G8B8_UNORM; + formats[count++] = PIPE_FORMAT_B8G8R8X8_UNORM; + formats[count++] = PIPE_FORMAT_A8R8G8B8_UNORM; + formats[count++] = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case 16: + formats[count++] = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + break; + } + + return count; +} + +static int +choose_depth_stencil_format(const __GLcontextModes *mode, + enum pipe_format formats[32]) +{ + int count = 0; + + switch (mode->depthBits) { + case 32: + formats[count++] = PIPE_FORMAT_Z32_UNORM; + break; + case 24: + if (mode->stencilBits) { + formats[count++] = PIPE_FORMAT_S8Z24_UNORM; + formats[count++] = PIPE_FORMAT_Z24S8_UNORM; + } + else { + formats[count++] = PIPE_FORMAT_X8Z24_UNORM; + formats[count++] = PIPE_FORMAT_Z24X8_UNORM; + } + break; + case 16: + formats[count++] = PIPE_FORMAT_Z16_UNORM; + break; + default: + break; + } + + return count; +} + +static boolean +is_format_supported(struct pipe_screen *screen, + enum pipe_format fmt, boolean is_color) +{ + return screen->is_format_supported(screen, fmt, PIPE_TEXTURE_2D, + (is_color) ? PIPE_TEXTURE_USAGE_RENDER_TARGET : + PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); +} + +static boolean +dri2_display_convert_config(struct native_display *ndpy, + const __GLcontextModes *mode, + struct native_config *nconf) +{ + enum pipe_format formats[32]; + int num_formats, i; + + if (!(mode->renderType & GLX_RGBA_BIT) || !mode->rgbMode) + return FALSE; + + /* skip single-buffered configs */ + if (!mode->doubleBufferMode) + return FALSE; + + nconf->mode = *mode; + nconf->mode.renderType = GLX_RGBA_BIT; + nconf->mode.rgbMode = TRUE; + /* pbuffer is allocated locally and is always supported */ + nconf->mode.drawableType |= GLX_PBUFFER_BIT; + /* the swap method is always copy */ + nconf->mode.swapMethod = GLX_SWAP_COPY_OML; + + /* fix up */ + nconf->mode.rgbBits = + nconf->mode.redBits + nconf->mode.greenBits + + nconf->mode.blueBits + nconf->mode.alphaBits; + if (!(nconf->mode.drawableType & GLX_WINDOW_BIT)) { + nconf->mode.visualID = 0; + nconf->mode.visualType = GLX_NONE; + } + if (!(nconf->mode.drawableType & GLX_PBUFFER_BIT)) { + nconf->mode.bindToTextureRgb = FALSE; + nconf->mode.bindToTextureRgba = FALSE; + } + + nconf->color_format = PIPE_FORMAT_NONE; + nconf->depth_format = PIPE_FORMAT_NONE; + nconf->stencil_format = PIPE_FORMAT_NONE; + + /* choose color format */ + num_formats = choose_color_format(mode, formats); + for (i = 0; i < num_formats; i++) { + if (is_format_supported(ndpy->screen, formats[i], TRUE)) { + nconf->color_format = formats[i]; + break; + } + } + if (nconf->color_format == PIPE_FORMAT_NONE) + return FALSE; + + /* choose depth/stencil format */ + num_formats = choose_depth_stencil_format(mode, formats); + for (i = 0; i < num_formats; i++) { + if (is_format_supported(ndpy->screen, formats[i], FALSE)) { + nconf->depth_format = formats[i]; + nconf->stencil_format = formats[i]; + break; + } + } + if ((nconf->mode.depthBits && nconf->depth_format == PIPE_FORMAT_NONE) || + (nconf->mode.stencilBits && nconf->stencil_format == PIPE_FORMAT_NONE)) + return FALSE; + + return TRUE; +} + +static const struct native_config ** +dri2_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + const struct native_config **configs; + int i; + + /* first time */ + if (!dri2dpy->configs) { + const __GLcontextModes *modes; + int num_modes, count; + + modes = x11_screen_get_glx_configs(dri2dpy->xscr); + if (!modes) + return NULL; + num_modes = x11_context_modes_count(modes); + + dri2dpy->configs = calloc(num_modes, sizeof(*dri2dpy->configs)); + if (!dri2dpy->configs) + return NULL; + + count = 0; + for (i = 0; i < num_modes; i++) { + struct native_config *nconf = &dri2dpy->configs[count].base; + if (dri2_display_convert_config(&dri2dpy->base, modes, nconf)) + count++; + modes = modes->next; + } + + dri2dpy->num_configs = count; + } + + configs = malloc(dri2dpy->num_configs * sizeof(*configs)); + if (configs) { + for (i = 0; i < dri2dpy->num_configs; i++) + configs[i] = (const struct native_config *) &dri2dpy->configs[i]; + if (num_configs) + *num_configs = dri2dpy->num_configs; + } + + return configs; +} + +static void +dri2_display_destroy(struct native_display *ndpy) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + + if (dri2dpy->configs) + free(dri2dpy->configs); + + if (dri2dpy->base.screen) + dri2dpy->base.screen->destroy(dri2dpy->base.screen); + + if (dri2dpy->xscr) + x11_screen_destroy(dri2dpy->xscr); + if (dri2dpy->own_dpy) + XCloseDisplay(dri2dpy->dpy); + if (dri2dpy->api && dri2dpy->api->destroy) + dri2dpy->api->destroy(dri2dpy->api); + free(dri2dpy); +} + +/** + * Initialize DRI2 and pipe screen. + */ +static boolean +dri2_display_init_screen(struct native_display *ndpy) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + const char *driver = dri2dpy->api->name; + struct drm_create_screen_arg arg; + int fd; + + if (!x11_screen_support(dri2dpy->xscr, X11_SCREEN_EXTENSION_DRI2) || + !x11_screen_support(dri2dpy->xscr, X11_SCREEN_EXTENSION_GLX)) { + _eglLog(_EGL_WARNING, "GLX/DRI2 is not supported"); + return FALSE; + } + + fd = x11_screen_enable_dri2(dri2dpy->xscr, driver); + if (fd < 0) + return FALSE; + + memset(&arg, 0, sizeof(arg)); + arg.mode = DRM_CREATE_NORMAL; + dri2dpy->base.screen = dri2dpy->api->create_screen(dri2dpy->api, fd, &arg); + if (!dri2dpy->base.screen) { + _eglLog(_EGL_WARNING, "failed to create DRM screen"); + return FALSE; + } + + return TRUE; +} + +struct native_display * +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api) +{ + struct dri2_display *dri2dpy; + + dri2dpy = CALLOC_STRUCT(dri2_display); + if (!dri2dpy) + return NULL; + + dri2dpy->api = api; + if (!dri2dpy->api) { + _eglLog(_EGL_WARNING, "failed to create DRM API"); + free(dri2dpy); + return NULL; + } + + dri2dpy->dpy = dpy; + if (!dri2dpy->dpy) { + dri2dpy->dpy = XOpenDisplay(NULL); + if (!dri2dpy->dpy) { + dri2_display_destroy(&dri2dpy->base); + return NULL; + } + dri2dpy->own_dpy = TRUE; + } + + dri2dpy->xscr_number = DefaultScreen(dri2dpy->dpy); + dri2dpy->xscr = x11_screen_create(dri2dpy->dpy, dri2dpy->xscr_number); + if (!dri2dpy->xscr) { + dri2_display_destroy(&dri2dpy->base); + return NULL; + } + + if (!dri2_display_init_screen(&dri2dpy->base)) { + dri2_display_destroy(&dri2dpy->base); + return NULL; + } + + dri2dpy->base.destroy = dri2_display_destroy; + dri2dpy->base.get_configs = dri2_display_get_configs; + dri2dpy->base.create_context = dri2_display_create_context; + dri2dpy->base.create_window_surface = dri2_display_create_window_surface; + dri2dpy->base.create_pixmap_surface = dri2_display_create_pixmap_surface; + dri2dpy->base.create_pbuffer_surface = dri2_display_create_pbuffer_surface; + + return &dri2dpy->base; +} diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.c b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c new file mode 100644 index 00000000000..583ce3d3293 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c @@ -0,0 +1,72 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <string.h> +#include "util/u_debug.h" +#include "state_tracker/drm_api.h" +#include "egllog.h" + +#include "native_x11.h" + +static struct drm_api *api; + +const char * +native_get_name(void) +{ + static char x11_name[32]; + + if (!api) + api = drm_api_create(); + + if (api) + snprintf(x11_name, sizeof(x11_name), "X11/%s", api->name); + else + snprintf(x11_name, sizeof(x11_name), "X11"); + + return x11_name; +} + +struct native_display * +native_create_display(EGLNativeDisplayType dpy) +{ + struct native_display *ndpy = NULL; + boolean force_sw; + + if (!api) + api = drm_api_create(); + + force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); + if (api && !force_sw) { + ndpy = x11_create_dri2_display(dpy, api); + } + + if (!ndpy) { + EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; + + _eglLog(level, "use software fallback"); + ndpy = x11_create_ximage_display(dpy, TRUE); + } + + return ndpy; +} diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.h b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h new file mode 100644 index 00000000000..622ddac5df6 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h @@ -0,0 +1,37 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NATIVE_X11_H_ +#define _NATIVE_X11_H_ + +#include "state_tracker/drm_api.h" +#include "common/native.h" + +struct native_display * +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm); + +struct native_display * +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api); + +#endif /* _NATIVE_X11_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c new file mode 100644 index 00000000000..24a50df7a0a --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c @@ -0,0 +1,676 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <sys/ipc.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <X11/extensions/XShm.h> +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "pipe/p_compiler.h" +#include "pipe/internal/p_winsys_screen.h" +#include "softpipe/sp_winsys.h" +#include "egllog.h" + +#include "sw_winsys.h" +#include "native_x11.h" +#include "x11_screen.h" + +enum ximage_surface_type { + XIMAGE_SURFACE_TYPE_WINDOW, + XIMAGE_SURFACE_TYPE_PIXMAP, + XIMAGE_SURFACE_TYPE_PBUFFER +}; + +struct ximage_display { + struct native_display base; + Display *dpy; + boolean own_dpy; + + struct x11_screen *xscr; + int xscr_number; + + boolean use_xshm; + + struct pipe_winsys *winsys; + struct ximage_config *configs; + int num_configs; +}; + +struct ximage_buffer { + XImage *ximage; + + struct pipe_texture *texture; + struct pipe_transfer *transfer; + XShmSegmentInfo *shm_info; + boolean xshm_attached; +}; + +struct ximage_surface { + struct native_surface base; + Drawable drawable; + enum ximage_surface_type type; + enum pipe_format color_format; + XVisualInfo visual; + struct ximage_display *xdpy; + + int width, height; + GC gc; + + struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; +}; + +struct ximage_config { + struct native_config base; + const XVisualInfo *visual; +}; + +static INLINE struct ximage_display * +ximage_display(const struct native_display *ndpy) +{ + return (struct ximage_display *) ndpy; +} + +static INLINE struct ximage_surface * +ximage_surface(const struct native_surface *nsurf) +{ + return (struct ximage_surface *) nsurf; +} + +static INLINE struct ximage_config * +ximage_config(const struct native_config *nconf) +{ + return (struct ximage_config *) nconf; +} + +static void +ximage_surface_free_buffer(struct native_surface *nsurf, + enum native_attachment which) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xbuf = &xsurf->buffers[which]; + struct pipe_screen *screen = xsurf->xdpy->base.screen; + + if (xbuf->transfer) { + screen->tex_transfer_destroy(xbuf->transfer); + xbuf->transfer = NULL; + } + pipe_texture_reference(&xbuf->texture, NULL); + + if (xbuf->shm_info) { + if (xbuf->xshm_attached) + XShmDetach(xsurf->xdpy->dpy, xbuf->shm_info); + if (xbuf->shm_info->shmaddr != (void *) -1) + shmdt(xbuf->shm_info->shmaddr); + if (xbuf->shm_info->shmid != -1) + shmctl(xbuf->shm_info->shmid, IPC_RMID, 0); + + xbuf->shm_info->shmaddr = (void *) -1; + xbuf->shm_info->shmid = -1; + } +} + +static boolean +ximage_surface_alloc_buffer(struct native_surface *nsurf, + enum native_attachment which) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xbuf = &xsurf->buffers[which]; + struct pipe_screen *screen = xsurf->xdpy->base.screen; + struct pipe_texture templ; + + /* free old data */ + if (xbuf->texture) + ximage_surface_free_buffer(&xsurf->base, which); + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.format = xsurf->color_format; + templ.width0 = xsurf->width; + templ.height0 = xsurf->height; + templ.depth0 = 1; + templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; + + if (xbuf->shm_info) { + struct pipe_buffer *pbuf; + unsigned stride, size; + void *addr = NULL; + + stride = util_format_get_stride(xsurf->color_format, xsurf->width); + /* alignment should depend on visual? */ + stride = align(stride, 4); + size = stride * xsurf->height; + + /* create and attach shm object */ + xbuf->shm_info->shmid = shmget(IPC_PRIVATE, size, 0755); + if (xbuf->shm_info->shmid != -1) { + xbuf->shm_info->shmaddr = + shmat(xbuf->shm_info->shmid, NULL, 0); + if (xbuf->shm_info->shmaddr != (void *) -1) { + if (XShmAttach(xsurf->xdpy->dpy, xbuf->shm_info)) { + addr = xbuf->shm_info->shmaddr; + xbuf->xshm_attached = TRUE; + } + } + } + + if (addr) { + pbuf = screen->user_buffer_create(screen, addr, size); + if (pbuf) { + xbuf->texture = + screen->texture_blanket(screen, &templ, &stride, pbuf); + pipe_buffer_reference(&pbuf, NULL); + } + } + } + else { + xbuf->texture = screen->texture_create(screen, &templ); + } + + if (xbuf->texture) { + xbuf->transfer = screen->get_tex_transfer(screen, xbuf->texture, + 0, 0, 0, PIPE_TRANSFER_READ, 0, 0, xsurf->width, xsurf->height); + if (!xbuf->transfer) + pipe_texture_reference(&xbuf->texture, NULL); + } + + /* clean up the buffer if allocation failed */ + if (!xbuf->texture) + ximage_surface_free_buffer(&xsurf->base, which); + + return (xbuf->texture != NULL); +} + +static boolean +ximage_surface_draw_buffer(struct native_surface *nsurf, + enum native_attachment which) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xbuf = &xsurf->buffers[which]; + struct pipe_screen *screen = xsurf->xdpy->base.screen; + + if (xsurf->type == XIMAGE_SURFACE_TYPE_PBUFFER) + return TRUE; + + assert(xsurf->drawable && xbuf->ximage && xbuf->texture); + + xbuf->ximage->data = screen->transfer_map(screen, xbuf->transfer); + + if (xbuf->shm_info) + XShmPutImage(xsurf->xdpy->dpy, xsurf->drawable, xsurf->gc, + xbuf->ximage, 0, 0, 0, 0, xsurf->width, xsurf->height, False); + else + XPutImage(xsurf->xdpy->dpy, xsurf->drawable, xsurf->gc, + xbuf->ximage, 0, 0, 0, 0, xsurf->width, xsurf->height); + + xbuf->ximage->data = NULL; + screen->transfer_unmap(screen, xbuf->transfer); + + XSync(xsurf->xdpy->dpy, FALSE); + + return TRUE; +} + +static boolean +ximage_surface_flush_frontbuffer(struct native_surface *nsurf) +{ + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); +} + +static boolean +ximage_surface_swap_buffers(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + struct ximage_buffer *xfront, *xback, xtmp; + + xfront = &xsurf->buffers[NATIVE_ATTACHMENT_FRONT_LEFT]; + xback = &xsurf->buffers[NATIVE_ATTACHMENT_BACK_LEFT]; + + /* draw the back buffer directly if there is no front buffer */ + if (!xfront->texture) + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_BACK_LEFT); + + /* swap the buffers */ + xtmp = *xfront; + *xfront = *xback; + *xback = xtmp; + + /* the front/back textures are swapped */ + xsurf->sequence_number++; + + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); +} + +static void +ximage_surface_update_geometry(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + Status ok; + Window root; + int x, y; + unsigned int w, h, border, depth; + + /* pbuffer has fixed geometry */ + if (xsurf->type == XIMAGE_SURFACE_TYPE_PBUFFER) + return; + + ok = XGetGeometry(xsurf->xdpy->dpy, xsurf->drawable, + &root, &x, &y, &w, &h, &border, &depth); + if (ok) { + xsurf->width = w; + xsurf->height = h; + } +} + +static boolean +ximage_surface_validate(struct native_surface *nsurf, + const enum native_attachment *natts, + unsigned num_natts, + unsigned int *seq_num, + struct pipe_texture **textures, + int *width, int *height) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + boolean new_buffers = FALSE, error = FALSE; + unsigned i; + + ximage_surface_update_geometry(&xsurf->base); + + if (textures) + memset(textures, 0, sizeof(*textures) * num_natts); + + for (i = 0; i < num_natts; i++) { + enum native_attachment natt = natts[i]; + struct ximage_buffer *xbuf = &xsurf->buffers[natt]; + + if (!xbuf) + continue; + + /* reallocate the texture */ + if (!xbuf->texture || + xsurf->width != xbuf->texture->width0 || + xsurf->height != xbuf->texture->height0) { + new_buffers = TRUE; + if (ximage_surface_alloc_buffer(&xsurf->base, natt)) { + /* update ximage */ + if (xbuf->ximage) { + xbuf->ximage->width = xbuf->transfer->width; + xbuf->ximage->height = xbuf->transfer->height; + xbuf->ximage->bytes_per_line = xbuf->transfer->stride; + } + } + } + + /* allocation failed */ + if (!xbuf->texture) { + unsigned j; + for (j = 0; j < i; j++) + pipe_texture_reference(&textures[j], NULL); + for (j = i; j < num_natts; j++) + textures[j] = NULL; + error = TRUE; + break; + } + + if (textures) + pipe_texture_reference(&textures[i], xbuf->texture); + } + + /* increase the sequence number so that caller knows */ + if (new_buffers) + xsurf->sequence_number++; + + if (seq_num) + *seq_num = xsurf->sequence_number; + if (width) + *width = xsurf->width; + if (height) + *height = xsurf->height; + + return !error; +} + +static void +ximage_surface_wait(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + XSync(xsurf->xdpy->dpy, FALSE); + /* TODO XGetImage and update the front texture */ +} + +static void +ximage_surface_destroy(struct native_surface *nsurf) +{ + struct ximage_surface *xsurf = ximage_surface(nsurf); + int i; + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct ximage_buffer *xbuf = &xsurf->buffers[i]; + ximage_surface_free_buffer(&xsurf->base, i); + /* xbuf->shm_info is owned by xbuf->ximage? */ + if (xbuf->ximage) { + XDestroyImage(xbuf->ximage); + xbuf->ximage = NULL; + } + } + + if (xsurf->type != XIMAGE_SURFACE_TYPE_PBUFFER) + XFreeGC(xsurf->xdpy->dpy, xsurf->gc); + free(xsurf); +} + +static struct ximage_surface * +ximage_display_create_surface(struct native_display *ndpy, + enum ximage_surface_type type, + Drawable drawable, + const struct native_config *nconf) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + struct ximage_config *xconf = ximage_config(nconf); + struct ximage_surface *xsurf; + int i; + + xsurf = CALLOC_STRUCT(ximage_surface); + if (!xsurf) + return NULL; + + xsurf->xdpy = xdpy; + xsurf->type = type; + xsurf->color_format = xconf->base.color_format; + xsurf->drawable = drawable; + + if (xsurf->type != XIMAGE_SURFACE_TYPE_PBUFFER) { + xsurf->drawable = drawable; + xsurf->visual = *xconf->visual; + + xsurf->gc = XCreateGC(xdpy->dpy, xsurf->drawable, 0, NULL); + if (!xsurf->gc) { + free(xsurf); + return NULL; + } + + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) { + struct ximage_buffer *xbuf = &xsurf->buffers[i]; + + if (xdpy->use_xshm) { + xbuf->shm_info = calloc(1, sizeof(*xbuf->shm_info)); + if (xbuf->shm_info) { + /* initialize shm info */ + xbuf->shm_info->shmid = -1; + xbuf->shm_info->shmaddr = (void *) -1; + xbuf->shm_info->readOnly = TRUE; + + xbuf->ximage = XShmCreateImage(xsurf->xdpy->dpy, + xsurf->visual.visual, + xsurf->visual.depth, + ZPixmap, NULL, + xbuf->shm_info, + 0, 0); + } + } + else { + xbuf->ximage = XCreateImage(xsurf->xdpy->dpy, + xsurf->visual.visual, + xsurf->visual.depth, + ZPixmap, 0, /* format, offset */ + NULL, /* data */ + 0, 0, /* size */ + 8, /* bitmap_pad */ + 0); /* bytes_per_line */ + } + + if (!xbuf->ximage) { + XFreeGC(xdpy->dpy, xsurf->gc); + free(xsurf); + return NULL; + } + } + } + + xsurf->base.destroy = ximage_surface_destroy; + xsurf->base.swap_buffers = ximage_surface_swap_buffers; + xsurf->base.flush_frontbuffer = ximage_surface_flush_frontbuffer; + xsurf->base.validate = ximage_surface_validate; + xsurf->base.wait = ximage_surface_wait; + + return xsurf; +} + +static struct native_surface * +ximage_display_create_window_surface(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf) +{ + struct ximage_surface *xsurf; + + xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_WINDOW, + (Drawable) win, nconf); + return (xsurf) ? &xsurf->base : NULL; +} + +static struct native_surface * +ximage_display_create_pixmap_surface(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct ximage_surface *xsurf; + + xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_PIXMAP, + (Drawable) pix, nconf); + return (xsurf) ? &xsurf->base : NULL; +} + +static struct native_surface * +ximage_display_create_pbuffer_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct ximage_surface *xsurf; + + xsurf = ximage_display_create_surface(ndpy, XIMAGE_SURFACE_TYPE_PBUFFER, + (Drawable) None, nconf); + if (xsurf) { + xsurf->width = width; + xsurf->height = height; + } + return (xsurf) ? &xsurf->base : NULL; +} + +static struct pipe_context * +ximage_display_create_context(struct native_display *ndpy, + void *context_private) +{ + struct pipe_context *pctx = softpipe_create(ndpy->screen); + if (pctx) + pctx->priv = context_private; + return pctx; +} + +static enum pipe_format +choose_format(const XVisualInfo *vinfo) +{ + enum pipe_format fmt; + /* TODO elaborate the formats */ + switch (vinfo->depth) { + case 32: + fmt = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case 24: + fmt = PIPE_FORMAT_X8R8G8B8_UNORM; + break; + case 16: + fmt = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + fmt = PIPE_FORMAT_NONE; + break; + } + + return fmt; +} + +static const struct native_config ** +ximage_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + const struct native_config **configs; + int i; + + /* first time */ + if (!xdpy->configs) { + const XVisualInfo *visuals; + int num_visuals, count, j; + + visuals = x11_screen_get_visuals(xdpy->xscr, &num_visuals); + if (!visuals) + return NULL; + + /* + * Create two configs for each visual. + * One with depth/stencil buffer; one without + */ + xdpy->configs = calloc(num_visuals * 2, sizeof(*xdpy->configs)); + if (!xdpy->configs) + return NULL; + + count = 0; + for (i = 0; i < num_visuals; i++) { + for (j = 0; j < 2; j++) { + struct ximage_config *xconf = &xdpy->configs[count]; + __GLcontextModes *mode = &xconf->base.mode; + + xconf->visual = &visuals[i]; + xconf->base.color_format = choose_format(xconf->visual); + if (xconf->base.color_format == PIPE_FORMAT_NONE) + continue; + + x11_screen_convert_visual(xdpy->xscr, xconf->visual, mode); + /* support double buffer mode */ + mode->doubleBufferMode = TRUE; + + xconf->base.depth_format = PIPE_FORMAT_NONE; + xconf->base.stencil_format = PIPE_FORMAT_NONE; + /* create the second config with depth/stencil buffer */ + if (j == 1) { + xconf->base.depth_format = PIPE_FORMAT_S8Z24_UNORM; + xconf->base.stencil_format = PIPE_FORMAT_S8Z24_UNORM; + mode->depthBits = 24; + mode->stencilBits = 8; + mode->haveDepthBuffer = TRUE; + mode->haveStencilBuffer = TRUE; + } + + mode->maxPbufferWidth = 4096; + mode->maxPbufferHeight = 4096; + mode->maxPbufferPixels = 4096 * 4096; + mode->drawableType = + GLX_WINDOW_BIT | GLX_PIXMAP_BIT | GLX_PBUFFER_BIT; + mode->swapMethod = GLX_SWAP_EXCHANGE_OML; + + if (mode->alphaBits) + mode->bindToTextureRgba = TRUE; + else + mode->bindToTextureRgb = TRUE; + + count++; + } + } + + xdpy->num_configs = count; + } + + configs = malloc(xdpy->num_configs * sizeof(*configs)); + if (configs) { + for (i = 0; i < xdpy->num_configs; i++) + configs[i] = (const struct native_config *) &xdpy->configs[i]; + if (num_configs) + *num_configs = xdpy->num_configs; + } + return configs; +} + +static void +ximage_display_destroy(struct native_display *ndpy) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + + if (xdpy->configs) + free(xdpy->configs); + + xdpy->base.screen->destroy(xdpy->base.screen); + free(xdpy->winsys); + + x11_screen_destroy(xdpy->xscr); + if (xdpy->own_dpy) + XCloseDisplay(xdpy->dpy); + free(xdpy); +} + +struct native_display * +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm) +{ + struct ximage_display *xdpy; + + xdpy = CALLOC_STRUCT(ximage_display); + if (!xdpy) + return NULL; + + xdpy->dpy = dpy; + if (!xdpy->dpy) { + xdpy->dpy = XOpenDisplay(NULL); + if (!xdpy->dpy) { + free(xdpy); + return NULL; + } + xdpy->own_dpy = TRUE; + } + + xdpy->xscr_number = DefaultScreen(xdpy->dpy); + xdpy->xscr = x11_screen_create(xdpy->dpy, xdpy->xscr_number); + if (!xdpy->xscr) { + free(xdpy); + return NULL; + } + + xdpy->use_xshm = + (use_xshm && x11_screen_support(xdpy->xscr, X11_SCREEN_EXTENSION_XSHM)); + + xdpy->winsys = create_sw_winsys(); + xdpy->base.screen = softpipe_create_screen(xdpy->winsys); + + xdpy->base.destroy = ximage_display_destroy; + + xdpy->base.get_configs = ximage_display_get_configs; + xdpy->base.create_context = ximage_display_create_context; + xdpy->base.create_window_surface = ximage_display_create_window_surface; + xdpy->base.create_pixmap_surface = ximage_display_create_pixmap_surface; + xdpy->base.create_pbuffer_surface = ximage_display_create_pbuffer_surface; + + return &xdpy->base; +} diff --git a/src/gallium/state_trackers/egl_g3d/x11/sw_winsys.c b/src/gallium/state_trackers/egl_g3d/x11/sw_winsys.c new file mode 100644 index 00000000000..6ee3ede38cb --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/sw_winsys.c @@ -0,0 +1,231 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Totally software-based winsys layer. + * Note that the one winsys function that we can't implement here + * is flush_frontbuffer(). + * Whoever uses this code will have to provide that. + * + * Authors: Brian Paul + */ + + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "sw_winsys.h" + + + +/** Subclass of pipe_winsys */ +struct sw_pipe_winsys +{ + struct pipe_winsys Base; + /* no extra fields for now */ +}; + + +/** subclass of pipe_buffer */ +struct sw_pipe_buffer +{ + struct pipe_buffer Base; + boolean UserBuffer; /** Is this a user-space buffer? */ + void *Data; + void *Mapped; +}; + + +/** cast wrapper */ +static INLINE struct sw_pipe_buffer * +sw_pipe_buffer(struct pipe_buffer *b) +{ + return (struct sw_pipe_buffer *) b; +} + + +static const char * +get_name(struct pipe_winsys *pws) +{ + return "software"; +} + + +/** Create new pipe_buffer and allocate storage of given size */ +static struct pipe_buffer * +buffer_create(struct pipe_winsys *pws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct sw_pipe_buffer *buffer = CALLOC_STRUCT(sw_pipe_buffer); + if (!buffer) + return NULL; + + pipe_reference_init(&buffer->Base.reference, 1); + buffer->Base.alignment = alignment; + buffer->Base.usage = usage; + buffer->Base.size = size; + + /* align to 16-byte multiple for Cell */ + buffer->Data = align_malloc(size, MAX2(alignment, 16)); + + return &buffer->Base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct sw_pipe_buffer *buffer = CALLOC_STRUCT(sw_pipe_buffer); + if (!buffer) + return NULL; + + pipe_reference_init(&buffer->Base.reference, 1); + buffer->Base.size = bytes; + buffer->UserBuffer = TRUE; + buffer->Data = ptr; + + return &buffer->Base; +} + + +static void * +buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buf, unsigned flags) +{ + struct sw_pipe_buffer *buffer = sw_pipe_buffer(buf); + buffer->Mapped = buffer->Data; + return buffer->Mapped; +} + + +static void +buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct sw_pipe_buffer *buffer = sw_pipe_buffer(buf); + buffer->Mapped = NULL; +} + + +static void +buffer_destroy(struct pipe_buffer *buf) +{ + struct sw_pipe_buffer *buffer = sw_pipe_buffer(buf); + + if (buffer->Data && !buffer->UserBuffer) { + align_free(buffer->Data); + buffer->Data = NULL; + } + + free(buffer); +} + + +static struct pipe_buffer * +surface_buffer_create(struct pipe_winsys *winsys, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned tex_usage, + unsigned *stride) +{ + const unsigned alignment = 64; + unsigned nblocksy; + + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); +} + + +static void +fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + /* no-op */ +} + + +static int +fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + /* no-op */ + return 0; +} + + +static int +fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + /* no-op */ + return 0; +} + + +/** + * Create/return a new pipe_winsys object. + */ +struct pipe_winsys * +create_sw_winsys(void) +{ + struct sw_pipe_winsys *ws = CALLOC_STRUCT(sw_pipe_winsys); + if (!ws) + return NULL; + + /* Fill in this struct with callbacks that pipe will need to + * communicate with the window system, buffer manager, etc. + */ + ws->Base.buffer_create = buffer_create; + ws->Base.user_buffer_create = user_buffer_create; + ws->Base.buffer_map = buffer_map; + ws->Base.buffer_unmap = buffer_unmap; + ws->Base.buffer_destroy = buffer_destroy; + + ws->Base.surface_buffer_create = surface_buffer_create; + + ws->Base.fence_reference = fence_reference; + ws->Base.fence_signalled = fence_signalled; + ws->Base.fence_finish = fence_finish; + + ws->Base.flush_frontbuffer = NULL; /* not implemented here! */ + + ws->Base.get_name = get_name; + + return &ws->Base; +} diff --git a/src/gallium/state_trackers/egl_g3d/x11/sw_winsys.h b/src/gallium/state_trackers/egl_g3d/x11/sw_winsys.h new file mode 100644 index 00000000000..f96c5a14b0a --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/sw_winsys.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SW_WINSYS_H +#define SW_WINSYS_H + + +struct pipe_winsys; + + +extern struct pipe_winsys * +create_sw_winsys(void); + + +#endif /* SW_WINSYS_H */ diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c new file mode 100644 index 00000000000..1e98943242a --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c @@ -0,0 +1,402 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <X11/Xlibint.h> +#include <X11/extensions/XShm.h> +#include "util/u_memory.h" +#include "util/u_math.h" +#include "xf86drm.h" +#include "egllog.h" + +#include "x11_screen.h" +#include "dri2.h" +#include "glxinit.h" + +struct x11_screen { + Display *dpy; + int number; + + __GLXdisplayPrivate *glx_dpy; + + int dri_major, dri_minor; + char *dri_driver; + char *dri_device; + int dri_fd; + + XVisualInfo *visuals; + int num_visuals; +}; + + +/** + * Create a X11 screen. + */ +struct x11_screen * +x11_screen_create(Display *dpy, int screen) +{ + struct x11_screen *xscr; + + if (screen >= ScreenCount(dpy)) + return NULL; + + xscr = CALLOC_STRUCT(x11_screen); + if (xscr) { + xscr->dpy = dpy; + xscr->number = screen; + + xscr->dri_major = -1; + xscr->dri_fd = -1; + } + return xscr; +} + +/** + * Destroy a X11 screen. + */ +void +x11_screen_destroy(struct x11_screen *xscr) +{ + if (xscr->dri_fd >= 0) + close(xscr->dri_fd); + if (xscr->dri_driver) + Xfree(xscr->dri_driver); + if (xscr->dri_device) + Xfree(xscr->dri_device); + + if (xscr->glx_dpy) + __glXRelease(xscr->glx_dpy); + if (xscr->visuals) + XFree(xscr->visuals); + free(xscr); +} + +static boolean +x11_screen_init_dri2(struct x11_screen *xscr) +{ + if (xscr->dri_major < 0) { + int eventBase, errorBase; + + if (!DRI2QueryExtension(xscr->dpy, &eventBase, &errorBase) || + !DRI2QueryVersion(xscr->dpy, &xscr->dri_major, &xscr->dri_minor)) + xscr->dri_major = -1; + } + return (xscr->dri_major >= 0); +} + +static boolean +x11_screen_init_glx(struct x11_screen *xscr) +{ + if (!xscr->glx_dpy) + xscr->glx_dpy = __glXInitialize(xscr->dpy); + return (xscr->glx_dpy != NULL); +} + +/** + * Return true if the screen supports the extension. + */ +boolean +x11_screen_support(struct x11_screen *xscr, enum x11_screen_extension ext) +{ + boolean supported = FALSE; + + switch (ext) { + case X11_SCREEN_EXTENSION_XSHM: + supported = XShmQueryExtension(xscr->dpy); + break; + case X11_SCREEN_EXTENSION_GLX: + supported = x11_screen_init_glx(xscr); + break; + case X11_SCREEN_EXTENSION_DRI2: + supported = x11_screen_init_dri2(xscr); + break; + default: + break; + } + + return supported; +} + +/** + * Return the X visuals. + */ +const XVisualInfo * +x11_screen_get_visuals(struct x11_screen *xscr, int *num_visuals) +{ + if (!xscr->visuals) { + XVisualInfo vinfo_template; + vinfo_template.screen = xscr->number; + xscr->visuals = XGetVisualInfo(xscr->dpy, VisualScreenMask, + &vinfo_template, &xscr->num_visuals); + } + + if (num_visuals) + *num_visuals = xscr->num_visuals; + return xscr->visuals; +} + +void +x11_screen_convert_visual(struct x11_screen *xscr, const XVisualInfo *visual, + __GLcontextModes *mode) +{ + int r, g, b, a; + int visual_type; + + r = util_bitcount(visual->red_mask); + g = util_bitcount(visual->green_mask); + b = util_bitcount(visual->blue_mask); + a = visual->depth - (r + g + b); +#if defined(__cplusplus) || defined(c_plusplus) + visual_type = visual->c_class; +#else + visual_type = visual->class; +#endif + + /* convert to GLX visual type */ + switch (visual_type) { + case TrueColor: + visual_type = GLX_TRUE_COLOR; + break; + case DirectColor: + visual_type = GLX_DIRECT_COLOR; + break; + case PseudoColor: + visual_type = GLX_PSEUDO_COLOR; + break; + case StaticColor: + visual_type = GLX_STATIC_COLOR; + break; + case GrayScale: + visual_type = GLX_GRAY_SCALE; + break; + case StaticGray: + visual_type = GLX_STATIC_GRAY; + break; + default: + visual_type = GLX_NONE; + break; + } + + mode->rgbBits = r + g + b + a; + mode->redBits = r; + mode->greenBits = g; + mode->blueBits = b; + mode->alphaBits = a; + mode->visualID = visual->visualid; + mode->visualType = visual_type; + + /* sane defaults */ + mode->renderType = GLX_RGBA_BIT; + mode->rgbMode = TRUE; + mode->visualRating = GLX_SLOW_CONFIG; + mode->xRenderable = TRUE; +} + +/** + * Return the GLX fbconfigs. + */ +const __GLcontextModes * +x11_screen_get_glx_configs(struct x11_screen *xscr) +{ + return (x11_screen_init_glx(xscr)) + ? xscr->glx_dpy->screenConfigs[xscr->number].configs + : NULL; +} + +/** + * Return the GLX visuals. + */ +const __GLcontextModes * +x11_screen_get_glx_visuals(struct x11_screen *xscr) +{ + return (x11_screen_init_glx(xscr)) + ? xscr->glx_dpy->screenConfigs[xscr->number].visuals + : NULL; +} + +static boolean +x11_screen_is_driver_equal(struct x11_screen *xscr, const char *driver) +{ + return (strcmp(xscr->dri_driver, driver) == 0); +} + +/** + * Enable DRI2 and returns the file descriptor of the DRM device. The file + * descriptor will be closed automatically when the screen is destoryed. + */ +int +x11_screen_enable_dri2(struct x11_screen *xscr, const char *driver) +{ + if (xscr->dri_fd < 0) { + int fd; + drm_magic_t magic; + + /* get the driver name and the device name first */ + if (!xscr->dri_driver) { + if (!DRI2Connect(xscr->dpy, RootWindow(xscr->dpy, xscr->number), + &xscr->dri_driver, &xscr->dri_device)) { + xscr->dri_driver = xscr->dri_device = NULL; + return -1; + } + } + + if (!x11_screen_is_driver_equal(xscr, driver)) { + _eglLog(_EGL_WARNING, "Driver mismatch: %s != %s", + xscr->dri_driver, driver); + return -1; + } + + fd = open(xscr->dri_device, O_RDWR); + if (fd < 0) { + _eglLog(_EGL_WARNING, "failed to open %s", xscr->dri_device); + return -1; + } + + memset(&magic, 0, sizeof(magic)); + if (drmGetMagic(fd, &magic)) { + _eglLog(_EGL_WARNING, "failed to get magic"); + close(fd); + return -1; + } + + if (!DRI2Authenticate(xscr->dpy, + RootWindow(xscr->dpy, xscr->number), magic)) { + _eglLog(_EGL_WARNING, "failed to authenticate magic"); + close(fd); + return -1; + } + + xscr->dri_fd = fd; + } + + return xscr->dri_fd; +} + +/** + * Create/Destroy the DRI drawable. + */ +void +x11_drawable_enable_dri2(struct x11_screen *xscr, + Drawable drawable, boolean on) +{ + if (on) + DRI2CreateDrawable(xscr->dpy, drawable); + else + DRI2DestroyDrawable(xscr->dpy, drawable); +} + +/** + * Copy between buffers of the DRI2 drawable. + */ +void +x11_drawable_copy_buffers(struct x11_screen *xscr, Drawable drawable, + int x, int y, int width, int height, + int src_buf, int dst_buf) +{ + XRectangle rect; + XserverRegion region; + + rect.x = x; + rect.y = y; + rect.width = width; + rect.height = height; + + region = XFixesCreateRegion(xscr->dpy, &rect, 1); + DRI2CopyRegion(xscr->dpy, drawable, region, dst_buf, src_buf); + XFixesDestroyRegion(xscr->dpy, region); +} + +/** + * Get the buffers of the DRI2 drawable. The returned array should be freed. + */ +struct x11_drawable_buffer * +x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, + int *width, int *height, unsigned int *attachments, + boolean with_format, int num_ins, int *num_outs) +{ + DRI2Buffer *dri2bufs; + + if (with_format) + dri2bufs = DRI2GetBuffersWithFormat(xscr->dpy, drawable, width, height, + attachments, num_ins, num_outs); + else + dri2bufs = DRI2GetBuffers(xscr->dpy, drawable, width, height, + attachments, num_ins, num_outs); + + return (struct x11_drawable_buffer *) dri2bufs; +} + +/** + * Create a mode list of the given size. + */ +__GLcontextModes * +x11_context_modes_create(unsigned count) +{ + const size_t size = sizeof(__GLcontextModes); + __GLcontextModes *base = NULL; + __GLcontextModes **next; + unsigned i; + + next = &base; + for (i = 0; i < count; i++) { + *next = (__GLcontextModes *) calloc(1, size); + if (*next == NULL) { + x11_context_modes_destroy(base); + base = NULL; + break; + } + next = &((*next)->next); + } + + return base; +} + +/** + * Destroy a mode list. + */ +void +x11_context_modes_destroy(__GLcontextModes *modes) +{ + while (modes != NULL) { + __GLcontextModes *next = modes->next; + free(modes); + modes = next; + } +} + +/** + * Return the number of the modes in the mode list. + */ +unsigned +x11_context_modes_count(const __GLcontextModes *modes) +{ + const __GLcontextModes *mode; + int count = 0; + for (mode = modes; mode; mode = mode->next) + count++; + return count; +} diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h new file mode 100644 index 00000000000..86e8e0501a3 --- /dev/null +++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h @@ -0,0 +1,99 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2009-2010 Chia-I Wu <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _X11_SCREEN_H_ +#define _X11_SCREEN_H_ + +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <X11/extensions/dri2tokens.h> +#include "pipe/p_compiler.h" +#include "common/native.h" + +enum x11_screen_extension { + X11_SCREEN_EXTENSION_XSHM, + X11_SCREEN_EXTENSION_GLX, + X11_SCREEN_EXTENSION_DRI2, +}; + +/* the same as DRI2Buffer */ +struct x11_drawable_buffer { + unsigned int attachment; + unsigned int name; + unsigned int pitch; + unsigned int cpp; + unsigned int flags; +}; + +struct x11_screen; + +struct x11_screen * +x11_screen_create(Display *dpy, int screen); + +void +x11_screen_destroy(struct x11_screen *xscr); + +boolean +x11_screen_support(struct x11_screen *xscr, enum x11_screen_extension ext); + +const XVisualInfo * +x11_screen_get_visuals(struct x11_screen *xscr, int *num_visuals); + +void +x11_screen_convert_visual(struct x11_screen *xscr, const XVisualInfo *visual, + __GLcontextModes *mode); + +const __GLcontextModes * +x11_screen_get_glx_configs(struct x11_screen *xscr); + +const __GLcontextModes * +x11_screen_get_glx_visuals(struct x11_screen *xscr); + +int +x11_screen_enable_dri2(struct x11_screen *xscr, const char *driver); + +__GLcontextModes * +x11_context_modes_create(unsigned count); + +void +x11_context_modes_destroy(__GLcontextModes *modes); + +unsigned +x11_context_modes_count(const __GLcontextModes *modes); + +void +x11_drawable_enable_dri2(struct x11_screen *xscr, + Drawable drawable, boolean on); + +void +x11_drawable_copy_buffers(struct x11_screen *xscr, Drawable drawable, + int x, int y, int width, int height, + int src_buf, int dst_buf); + +struct x11_drawable_buffer * +x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, + int *width, int *height, unsigned int *attachments, + boolean with_format, int num_ins, int *num_outs); + +#endif /* _X11_SCREEN_H_ */ diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index f2881b9a31e..3caf56e924b 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -644,6 +644,7 @@ register_with_display(Display *dpy) XExtCodes *c = XAddExtension(dpy); ext = dpy->ext_procs; /* new extension is at head of list */ assert(c->extension == ext->codes.extension); + (void) c; ext->name = _mesa_strdup(extName); ext->close_display = close_display_callback; } @@ -1006,7 +1007,7 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) } -XVisualInfo * +PUBLIC XVisualInfo * glXChooseVisual( Display *dpy, int screen, int *list ) { XMesaVisual xmvis; @@ -1028,7 +1029,7 @@ glXChooseVisual( Display *dpy, int screen, int *list ) } -GLXContext +PUBLIC GLXContext glXCreateContext( Display *dpy, XVisualInfo *visinfo, GLXContext share_list, Bool direct ) { @@ -1083,7 +1084,7 @@ static XMesaBuffer MakeCurrent_PrevReadBuffer = 0; /* GLX 1.3 and later */ -Bool +PUBLIC Bool glXMakeContextCurrent( Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx ) { @@ -1179,21 +1180,21 @@ glXMakeContextCurrent( Display *dpy, GLXDrawable draw, } -Bool +PUBLIC Bool glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx ) { return glXMakeContextCurrent( dpy, drawable, drawable, ctx ); } -GLXContext +PUBLIC GLXContext glXGetCurrentContext(void) { return GetCurrentContext(); } -Display * +PUBLIC Display * glXGetCurrentDisplay(void) { GLXContext glxCtx = glXGetCurrentContext(); @@ -1202,14 +1203,14 @@ glXGetCurrentDisplay(void) } -Display * +PUBLIC Display * glXGetCurrentDisplayEXT(void) { return glXGetCurrentDisplay(); } -GLXDrawable +PUBLIC GLXDrawable glXGetCurrentDrawable(void) { GLXContext gc = glXGetCurrentContext(); @@ -1217,7 +1218,7 @@ glXGetCurrentDrawable(void) } -GLXDrawable +PUBLIC GLXDrawable glXGetCurrentReadDrawable(void) { GLXContext gc = glXGetCurrentContext(); @@ -1225,14 +1226,14 @@ glXGetCurrentReadDrawable(void) } -GLXDrawable +PUBLIC GLXDrawable glXGetCurrentReadDrawableSGI(void) { return glXGetCurrentReadDrawable(); } -GLXPixmap +PUBLIC GLXPixmap glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap ) { XMesaVisual v; @@ -1257,7 +1258,7 @@ glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap ) /*** GLX_MESA_pixmap_colormap ***/ -GLXPixmap +PUBLIC GLXPixmap glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap ) { @@ -1281,7 +1282,7 @@ glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo, } -void +PUBLIC void glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ) { XMesaBuffer b = XMesaFindBuffer(dpy, pixmap); @@ -1294,7 +1295,7 @@ glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ) } -void +PUBLIC void glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, unsigned long mask ) { @@ -1308,7 +1309,7 @@ glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, } -Bool +PUBLIC Bool glXQueryExtension( Display *dpy, int *errorBase, int *eventBase ) { int op, ev, err; @@ -1323,7 +1324,7 @@ glXQueryExtension( Display *dpy, int *errorBase, int *eventBase ) } -void +PUBLIC void glXDestroyContext( Display *dpy, GLXContext ctx ) { GLXContext glxCtx = ctx; @@ -1339,7 +1340,7 @@ glXDestroyContext( Display *dpy, GLXContext ctx ) } -Bool +PUBLIC Bool glXIsDirect( Display *dpy, GLXContext ctx ) { GLXContext glxCtx = ctx; @@ -1349,7 +1350,7 @@ glXIsDirect( Display *dpy, GLXContext ctx ) -void +PUBLIC void glXSwapBuffers( Display *dpy, GLXDrawable drawable ) { XMesaBuffer buffer = XMesaFindBuffer( dpy, drawable ); @@ -1376,7 +1377,7 @@ glXSwapBuffers( Display *dpy, GLXDrawable drawable ) /*** GLX_MESA_copy_sub_buffer ***/ -void +PUBLIC void glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, int x, int y, int width, int height ) { @@ -1390,7 +1391,7 @@ glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, } -Bool +PUBLIC Bool glXQueryVersion( Display *dpy, int *maj, int *min ) { (void) dpy; @@ -1607,7 +1608,7 @@ get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig ) } -int +PUBLIC int glXGetConfig( Display *dpy, XVisualInfo *visinfo, int attrib, int *value ) { @@ -1637,7 +1638,7 @@ glXGetConfig( Display *dpy, XVisualInfo *visinfo, } -void +PUBLIC void glXWaitGL( void ) { XMesaContext xmesa = XMesaGetCurrentContext(); @@ -1646,7 +1647,7 @@ glXWaitGL( void ) -void +PUBLIC void glXWaitX( void ) { XMesaContext xmesa = XMesaGetCurrentContext(); @@ -1663,7 +1664,7 @@ get_extensions( void ) /* GLX 1.1 and later */ -const char * +PUBLIC const char * glXQueryExtensionsString( Display *dpy, int screen ) { (void) dpy; @@ -1674,7 +1675,7 @@ glXQueryExtensionsString( Display *dpy, int screen ) /* GLX 1.1 and later */ -const char * +PUBLIC const char * glXQueryServerString( Display *dpy, int screen, int name ) { static char version[1000]; @@ -1699,7 +1700,7 @@ glXQueryServerString( Display *dpy, int screen, int name ) /* GLX 1.1 and later */ -const char * +PUBLIC const char * glXGetClientString( Display *dpy, int name ) { static char version[1000]; @@ -1727,7 +1728,7 @@ glXGetClientString( Display *dpy, int name ) */ -int +PUBLIC int glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config, int attribute, int *value ) { @@ -1742,7 +1743,7 @@ glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config, } -GLXFBConfig * +PUBLIC GLXFBConfig * glXGetFBConfigs( Display *dpy, int screen, int *nelements ) { XVisualInfo *visuals, visTemplate; @@ -1768,7 +1769,7 @@ glXGetFBConfigs( Display *dpy, int screen, int *nelements ) } -GLXFBConfig * +PUBLIC GLXFBConfig * glXChooseFBConfig( Display *dpy, int screen, const int *attribList, int *nitems ) { @@ -1797,7 +1798,7 @@ glXChooseFBConfig( Display *dpy, int screen, } -XVisualInfo * +PUBLIC XVisualInfo * glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config ) { if (dpy && config) { @@ -1819,7 +1820,7 @@ glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config ) } -GLXWindow +PUBLIC GLXWindow glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, const int *attribList ) { @@ -1839,7 +1840,7 @@ glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, } -void +PUBLIC void glXDestroyWindow( Display *dpy, GLXWindow window ) { XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable) window); @@ -1850,7 +1851,7 @@ glXDestroyWindow( Display *dpy, GLXWindow window ) /* XXX untested */ -GLXPixmap +PUBLIC GLXPixmap glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList ) { @@ -1960,7 +1961,7 @@ glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, } -void +PUBLIC void glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ) { XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable)pixmap); @@ -1970,7 +1971,7 @@ glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ) } -GLXPbuffer +PUBLIC GLXPbuffer glXCreatePbuffer( Display *dpy, GLXFBConfig config, const int *attribList ) { @@ -2033,7 +2034,7 @@ glXCreatePbuffer( Display *dpy, GLXFBConfig config, } -void +PUBLIC void glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf ) { XMesaBuffer b = XMesaFindBuffer(dpy, pbuf); @@ -2043,7 +2044,7 @@ glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf ) } -void +PUBLIC void glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute, unsigned int *value ) { @@ -2089,7 +2090,7 @@ glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute, } -GLXContext +PUBLIC GLXContext glXCreateNewContext( Display *dpy, GLXFBConfig config, int renderType, GLXContext shareList, Bool direct ) { @@ -2123,7 +2124,7 @@ glXCreateNewContext( Display *dpy, GLXFBConfig config, } -int +PUBLIC int glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value ) { GLXContext glxCtx = ctx; @@ -2152,7 +2153,7 @@ glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value ) } -void +PUBLIC void glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask ) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); @@ -2161,7 +2162,7 @@ glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask ) } -void +PUBLIC void glXGetSelectedEvent( Display *dpy, GLXDrawable drawable, unsigned long *mask ) { @@ -2176,7 +2177,7 @@ glXGetSelectedEvent( Display *dpy, GLXDrawable drawable, /*** GLX_SGI_swap_control ***/ -int +PUBLIC int glXSwapIntervalSGI(int interval) { (void) interval; @@ -2189,7 +2190,7 @@ glXSwapIntervalSGI(int interval) static unsigned int FrameCounter = 0; -int +PUBLIC int glXGetVideoSyncSGI(unsigned int *count) { /* this is a bogus implementation */ @@ -2197,7 +2198,7 @@ glXGetVideoSyncSGI(unsigned int *count) return 0; } -int +PUBLIC int glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) { if (divisor <= 0 || remainder < 0) @@ -2214,7 +2215,7 @@ glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) /*** GLX_SGI_make_current_read ***/ -Bool +PUBLIC Bool glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx) { return glXMakeContextCurrent( dpy, draw, read, ctx ); @@ -2232,7 +2233,7 @@ glXGetCurrentReadDrawableSGI(void) /*** GLX_SGIX_video_source ***/ #if defined(_VL_H) -GLXVideoSourceSGIX +PUBLIC GLXVideoSourceSGIX glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode) { (void) dpy; @@ -2244,7 +2245,7 @@ glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath pa return 0; } -void +PUBLIC void glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src) { (void) dpy; @@ -2256,21 +2257,21 @@ glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src) /*** GLX_EXT_import_context ***/ -void +PUBLIC void glXFreeContextEXT(Display *dpy, GLXContext context) { (void) dpy; (void) context; } -GLXContextID +PUBLIC GLXContextID glXGetContextIDEXT(const GLXContext context) { (void) context; return 0; } -GLXContext +PUBLIC GLXContext glXImportContextEXT(Display *dpy, GLXContextID contextID) { (void) dpy; @@ -2278,7 +2279,7 @@ glXImportContextEXT(Display *dpy, GLXContextID contextID) return 0; } -int +PUBLIC int glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value) { (void) dpy; @@ -2292,20 +2293,20 @@ glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *val /*** GLX_SGIX_fbconfig ***/ -int +PUBLIC int glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value) { return glXGetFBConfigAttrib(dpy, config, attribute, value); } -GLXFBConfigSGIX * +PUBLIC GLXFBConfigSGIX * glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements) { return (GLXFBConfig *) glXChooseFBConfig(dpy, screen, attrib_list, nelements); } -GLXPixmap +PUBLIC GLXPixmap glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap) { XMesaVisual xmvis = (XMesaVisual) config; @@ -2314,7 +2315,7 @@ glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pi } -GLXContext +PUBLIC GLXContext glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct) { XMesaVisual xmvis = (XMesaVisual) config; @@ -2343,14 +2344,14 @@ glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_ } -XVisualInfo * +PUBLIC XVisualInfo * glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config) { return glXGetVisualFromFBConfig(dpy, config); } -GLXFBConfigSGIX +PUBLIC GLXFBConfigSGIX glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis) { XMesaVisual xmvis = find_glx_visual(dpy, vis); @@ -2366,7 +2367,7 @@ glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis) /*** GLX_SGIX_pbuffer ***/ -GLXPbufferSGIX +PUBLIC GLXPbufferSGIX glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attribList) @@ -2405,7 +2406,7 @@ glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, } -void +PUBLIC void glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf); @@ -2415,7 +2416,7 @@ glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf) } -int +PUBLIC int glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value) { const XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf); @@ -2448,7 +2449,7 @@ glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigne } -void +PUBLIC void glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); @@ -2459,7 +2460,7 @@ glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask) } -void +PUBLIC void glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); @@ -2475,7 +2476,7 @@ glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask) /*** GLX_SGI_cushion ***/ -void +PUBLIC void glXCushionSGI(Display *dpy, Window win, float cushion) { (void) dpy; @@ -2487,7 +2488,7 @@ glXCushionSGI(Display *dpy, Window win, float cushion) /*** GLX_SGIX_video_resize ***/ -int +PUBLIC int glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window) { (void) dpy; @@ -2497,7 +2498,7 @@ glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window return 0; } -int +PUBLIC int glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h) { (void) dpy; @@ -2510,7 +2511,7 @@ glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, i return 0; } -int +PUBLIC int glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h) { (void) dpy; @@ -2523,7 +2524,7 @@ glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, i return 0; } -int +PUBLIC int glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh) { (void) dpy; @@ -2536,7 +2537,7 @@ glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *d return 0; } -int +PUBLIC int glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype) { (void) dpy; @@ -2551,7 +2552,7 @@ glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype) /*** GLX_SGIX_dmbuffer **/ #if defined(_DM_BUFFER_H_) -Bool +PUBLIC Bool glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer) { (void) dpy; @@ -2565,7 +2566,7 @@ glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params /*** GLX_SGIX_swap_group ***/ -void +PUBLIC void glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member) { (void) dpy; @@ -2577,7 +2578,7 @@ glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member) /*** GLX_SGIX_swap_barrier ***/ -void +PUBLIC void glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier) { (void) dpy; @@ -2585,7 +2586,7 @@ glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier) (void) barrier; } -Bool +PUBLIC Bool glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max) { (void) dpy; @@ -2598,7 +2599,7 @@ glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max) /*** GLX_SUN_get_transparent_index ***/ -Status +PUBLIC Status glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent) { (void) dpy; @@ -2616,7 +2617,7 @@ glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *p * Release the depth, stencil, accum buffers attached to a GLXDrawable * (a window or pixmap) prior to destroying the GLXDrawable. */ -Bool +PUBLIC Bool glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ) { XMesaBuffer b = XMesaFindBuffer(dpy, d); @@ -2629,7 +2630,7 @@ glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ) /*** GLX_EXT_texture_from_pixmap ***/ -void +PUBLIC void glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list) { @@ -2638,7 +2639,7 @@ glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, XMesaBindTexImage(dpy, b, buffer, attrib_list); } -void +PUBLIC void glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer) { XMesaBuffer b = XMesaFindBuffer(dpy, drawable); diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c index ca7d88c9227..84d47b12edc 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c +++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c @@ -34,6 +34,7 @@ #include <string.h> #include "GL/glx.h" #include "glapi/glapi.h" +#include "pipe/p_compiler.h" struct name_address_pair { @@ -208,6 +209,7 @@ glXGetProcAddressARB(const GLubyte *procName) /* GLX 1.4 */ +PUBLIC void (*glXGetProcAddress(const GLubyte *procName))() { return glXGetProcAddressARB(procName); diff --git a/src/gallium/state_trackers/glx/xlib/glx_usefont.c b/src/gallium/state_trackers/glx/xlib/glx_usefont.c index acc64df62b6..16e5ce642f3 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_usefont.c +++ b/src/gallium/state_trackers/glx/xlib/glx_usefont.c @@ -33,6 +33,7 @@ #include "main/context.h" #include "main/imports.h" #include <GL/glx.h> +#include "pipe/p_compiler.h" /* Some debugging info. */ @@ -210,7 +211,7 @@ isvalid(XFontStruct * fs, unsigned int which) } -void +PUBLIC void glXUseXFont(Font font, int first, int count, int listbase) { Display *dpy; diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index c76dfb31d2b..1783bc504d9 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -67,6 +67,10 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" +#include "trace/tr_screen.h" +#include "trace/tr_context.h" +#include "trace/tr_texture.h" + #include "xm_winsys.h" #include <GL/glx.h> @@ -87,6 +91,8 @@ void xmesa_set_driver( const struct xm_driver *templ ) */ pipe_mutex _xmesa_lock; +static struct pipe_screen *_screen = NULL; +static struct pipe_screen *screen = NULL; /**********************************************************************/ @@ -754,7 +760,7 @@ PUBLIC XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) { static GLboolean firstTime = GL_TRUE; - static struct pipe_screen *screen = NULL; + struct pipe_context *_pipe = NULL; struct pipe_context *pipe = NULL; XMesaContext c; GLcontext *mesaCtx; @@ -762,7 +768,8 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (firstTime) { pipe_mutex_init(_xmesa_lock); - screen = driver.create_pipe_screen(); + _screen = driver.create_pipe_screen(); + screen = trace_screen_create( _screen ); firstTime = GL_FALSE; } @@ -781,9 +788,11 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (screen == NULL) goto fail; - pipe = driver.create_pipe_context(screen, (void *) c); - if (pipe == NULL) + _pipe = driver.create_pipe_context(_screen, (void *) c); + if (_pipe == NULL) goto fail; + pipe = trace_context_create(screen, _pipe); + pipe->priv = c; c->st = st_create_context(pipe, &v->mesa_visual, @@ -1110,6 +1119,12 @@ void XMesaSwapBuffers( XMesaBuffer b ) st_swapbuffers(b->stfb, &frontLeftSurf, NULL); if (frontLeftSurf) { + if (_screen != screen) { + struct trace_surface *tr_surf = trace_surface( frontLeftSurf ); + struct pipe_surface *surf = tr_surf->surface; + frontLeftSurf = surf; + } + driver.display_surface(b, frontLeftSurf); } diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript index ec385e7c447..8498a90812e 100644 --- a/src/gallium/state_trackers/python/SConscript +++ b/src/gallium/state_trackers/python/SConscript @@ -28,20 +28,35 @@ if 'python' in env['statetrackers']: 'X11', ]) + sources = [ + 'gallium.i', + 'st_device.c', + 'st_sample.c', + ] + + drivers = [ + trace + ] + + if 'llvmpipe' in env['drivers']: + env.Tool('llvm') + sources += ['st_llvmpipe_winsys.c'] + drivers += [llvmpipe] + else: + sources += ['st_softpipe_winsys.c'] + drivers += [softpipe] + pyst = env.ConvenienceLibrary( target = 'pyst', - source = [ - 'gallium.i', - 'st_device.c', - 'st_sample.c', - 'st_softpipe_winsys.c', - ], + source = sources, ) + env['no_import_lib'] = 1 + env.SharedLibrary( target = '_gallium', source = [ 'st_hardpipe_winsys.c', ], - LIBS = [pyst, softpipe, trace] + auxiliaries + env['LIBS'], + LIBS = [pyst] + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index f0254dc64ea..68797228459 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -46,6 +46,7 @@ #include "util/u_draw_quad.h" #include "util/u_tile.h" #include "util/u_math.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 14325be8cb4..ce893dad453 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -52,11 +52,16 @@ struct st_context { cso_set_blend($self->cso, state); } - void set_sampler( unsigned index, const struct pipe_sampler_state *state ) { + void set_fragment_sampler( unsigned index, const struct pipe_sampler_state *state ) { cso_single_sampler($self->cso, index, state); cso_single_sampler_done($self->cso); } + void set_vertex_sampler( unsigned index, const struct pipe_sampler_state *state ) { + cso_single_vertex_sampler($self->cso, index, state); + cso_single_vertex_sampler_done($self->cso); + } + void set_rasterizer( const struct pipe_rasterizer_state *state ) { cso_set_rasterizer($self->cso, state); } @@ -103,6 +108,25 @@ struct st_context { $self->vs = vs; } + void set_geometry_shader( const struct pipe_shader_state *state ) { + void *gs; + + if(!state) { + cso_set_geometry_shader_handle($self->cso, NULL); + return; + } + + gs = $self->pipe->create_gs_state($self->pipe, state); + if(!gs) + return; + + if(cso_set_geometry_shader_handle($self->cso, gs) != PIPE_OK) + return; + + cso_delete_geometry_shader($self->cso, $self->gs); + $self->gs = gs; + } + /* * Parameter-like state (or properties) */ @@ -139,14 +163,24 @@ struct st_context { cso_set_viewport($self->cso, state); } - void set_sampler_texture(unsigned index, - struct pipe_texture *texture) { + void set_fragment_sampler_texture(unsigned index, + struct pipe_texture *texture) { if(!texture) texture = $self->default_texture; - pipe_texture_reference(&$self->sampler_textures[index], texture); - $self->pipe->set_fragment_sampler_textures($self->pipe, + pipe_texture_reference(&$self->fragment_sampler_textures[index], texture); + $self->pipe->set_fragment_sampler_textures($self->pipe, PIPE_MAX_SAMPLERS, - $self->sampler_textures); + $self->fragment_sampler_textures); + } + + void set_vertex_sampler_texture(unsigned index, + struct pipe_texture *texture) { + if(!texture) + texture = $self->default_texture; + pipe_texture_reference(&$self->vertex_sampler_textures[index], texture); + $self->pipe->set_vertex_sampler_textures($self->pipe, + PIPE_MAX_VERTEX_SAMPLERS, + $self->vertex_sampler_textures); } void set_vertex_buffer(unsigned index, diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i index 2dc995adb07..0eba488a078 100644 --- a/src/gallium/state_trackers/python/p_device.i +++ b/src/gallium/state_trackers/python/p_device.i @@ -87,6 +87,10 @@ struct st_device { enum pipe_texture_target target, unsigned tex_usage, unsigned geom_flags ) { + /* We can't really display surfaces with the python statetracker so mask + * out that usage */ + tex_usage &= ~PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + return $self->screen->is_format_supported( $self->screen, format, target, @@ -110,6 +114,11 @@ struct st_device { unsigned tex_usage = 0 ) { struct pipe_texture templat; + + /* We can't really display surfaces with the python statetracker so mask + * out that usage */ + tex_usage &= ~PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + memset(&templat, 0, sizeof(templat)); templat.format = format; templat.width0 = width; @@ -118,6 +127,7 @@ struct st_device { templat.last_level = last_level; templat.target = target; templat.tex_usage = tex_usage; + return $self->screen->texture_create($self->screen, &templat); } diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i index 1de7f86a3c7..761587dc533 100644 --- a/src/gallium/state_trackers/python/p_texture.i +++ b/src/gallium/state_trackers/python/p_texture.i @@ -132,8 +132,8 @@ struct st_surface struct pipe_transfer *transfer; unsigned stride; - stride = pf_get_stride(texture->format, w); - *LENGTH = pf_get_nblocksy(texture->format, h) * stride; + stride = util_format_get_stride(texture->format, w); + *LENGTH = util_format_get_nblocksy(texture->format, h) * stride; *STRING = (char *) malloc(*LENGTH); if(!*STRING) return; @@ -159,9 +159,9 @@ struct st_surface struct pipe_transfer *transfer; if(stride == 0) - stride = pf_get_stride(texture->format, w); + stride = util_format_get_stride(texture->format, w); - if(LENGTH < pf_get_nblocksy(texture->format, h) * stride) + if(LENGTH < util_format_get_nblocksy(texture->format, h) * stride) SWIG_exception(SWIG_ValueError, "offset must be smaller than buffer size"); transfer = screen->get_tex_transfer(screen, diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index 222fef7d9d2..b61d47d6456 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -52,10 +52,10 @@ def make_image(surface, x=None, y=None, w=None, h=None): w = surface.width - x if h is None: h = surface.height - y - data = surface.get_tile_rgba8(0, 0, surface.width, surface.height) + data = surface.get_tile_rgba8(x, y, surface.width, surface.height) import Image - outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1) + outimage = Image.fromstring('RGBA', (w, h), data, "raw", 'RGBA', 0, 1) return outimage def save_image(filename, surface, x=None, y=None, w=None, h=None): @@ -278,9 +278,9 @@ class Screen(Object): def texture_create(self, templat): return self.real.texture_create( format = templat.format, - width = templat.width0, - height = templat.height0, - depth = templat.depth0, + width = templat.width, + height = templat.height, + depth = templat.depth, last_level = templat.last_level, target = templat.target, tex_usage = templat.tex_usage, @@ -387,9 +387,13 @@ class Context(Object): def delete_sampler_state(self, state): pass + def bind_vertex_sampler_states(self, num_states, states): + for i in range(num_states): + self.real.set_vertex_sampler(i, states[i]) + def bind_fragment_sampler_states(self, num_states, states): for i in range(num_states): - self.real.set_sampler(i, states[i]) + self.real.set_fragment_sampler(i, states[i]) def create_rasterizer_state(self, state): return state @@ -487,7 +491,11 @@ class Context(Object): def set_fragment_sampler_textures(self, num_textures, textures): for i in range(num_textures): - self.real.set_sampler_texture(i, textures[i]) + self.real.set_fragment_sampler_texture(i, textures[i]) + + def set_vertex_sampler_textures(self, num_textures, textures): + for i in range(num_textures): + self.real.set_vertex_sampler_texture(i, textures[i]) def set_vertex_buffers(self, num_buffers, buffers): self.vbufs = buffers[0:num_buffers] diff --git a/src/gallium/state_trackers/python/samples/gs.py b/src/gallium/state_trackers/python/samples/gs.py new file mode 100644 index 00000000000..a07cf557f2f --- /dev/null +++ b/src/gallium/state_trackers/python/samples/gs.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2009 VMware +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################## + + +from gallium import * + + +def make_image(surface): + data = surface.get_tile_rgba8(0, 0, surface.width, surface.height) + + import Image + outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1) + return outimage + +def save_image(filename, surface): + outimage = make_image(surface) + outimage.save(filename, "PNG") + +def show_image(surface): + outimage = make_image(surface) + + import Tkinter as tk + from PIL import Image, ImageTk + root = tk.Tk() + + root.title('background image') + + image1 = ImageTk.PhotoImage(outimage) + w = image1.width() + h = image1.height() + x = 100 + y = 100 + root.geometry("%dx%d+%d+%d" % (w, h, x, y)) + panel1 = tk.Label(root, image=image1) + panel1.pack(side='top', fill='both', expand='yes') + panel1.image = image1 + root.mainloop() + + +def test(dev): + ctx = dev.context_create() + + width = 255 + height = 255 + minz = 0.0 + maxz = 1.0 + + # disabled blending/masking + blend = Blend() + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.colormask = PIPE_MASK_RGBA + ctx.set_blend(blend) + + # depth/stencil/alpha + depth_stencil_alpha = DepthStencilAlpha() + depth_stencil_alpha.depth.enabled = 1 + depth_stencil_alpha.depth.writemask = 1 + depth_stencil_alpha.depth.func = PIPE_FUNC_LESS + ctx.set_depth_stencil_alpha(depth_stencil_alpha) + + # rasterizer + rasterizer = Rasterizer() + rasterizer.front_winding = PIPE_WINDING_CW + rasterizer.cull_mode = PIPE_WINDING_NONE + rasterizer.scissor = 1 + ctx.set_rasterizer(rasterizer) + + # viewport + viewport = Viewport() + scale = FloatArray(4) + scale[0] = width / 2.0 + scale[1] = -height / 2.0 + scale[2] = (maxz - minz) / 2.0 + scale[3] = 1.0 + viewport.scale = scale + translate = FloatArray(4) + translate[0] = width / 2.0 + translate[1] = height / 2.0 + translate[2] = (maxz - minz) / 2.0 + translate[3] = 0.0 + viewport.translate = translate + ctx.set_viewport(viewport) + + # samplers + sampler = Sampler() + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.normalized_coords = 1 + ctx.set_sampler(0, sampler) + + # scissor + scissor = Scissor() + scissor.minx = 0 + scissor.miny = 0 + scissor.maxx = width + scissor.maxy = height + ctx.set_scissor(scissor) + + clip = Clip() + clip.nr = 0 + ctx.set_clip(clip) + + # framebuffer + cbuf = dev.texture_create( + PIPE_FORMAT_X8R8G8B8_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, + ).get_surface() + zbuf = dev.texture_create( + PIPE_FORMAT_Z32_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL, + ).get_surface() + fb = Framebuffer() + fb.width = width + fb.height = height + fb.nr_cbufs = 1 + fb.set_cbuf(0, cbuf) + fb.set_zsbuf(zbuf) + ctx.set_framebuffer(fb) + rgba = FloatArray(4); + rgba[0] = 0.0 + rgba[1] = 0.0 + rgba[2] = 0.0 + rgba[3] = 0.0 + ctx.clear(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, rgba, 1.0, 0xff) + + # vertex shader + vs = Shader(''' + VERT + DCL IN[0], POSITION, CONSTANT + DCL IN[1], COLOR, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:MOV OUT[1], IN[1] + 2:END + ''') + ctx.set_vertex_shader(vs) + + gs = Shader(''' + GEOM + PROPERTY GS_INPUT_PRIMITIVE TRIANGLES + PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP + DCL IN[][0], POSITION, CONSTANT + DCL IN[][1], COLOR, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], COLOR, CONSTANT + 0:MOV OUT[0], IN[0][0] + 1:MOV OUT[1], IN[0][1] + 2:EMIT + 3:MOV OUT[0], IN[1][0] + 4:MOV OUT[1], IN[1][1] + 5:EMIT + 6:MOV OUT[0], IN[2][0] + 7:MOV OUT[1], IN[2][1] + 8:EMIT + 9:ENDPRIM + 10:END + ''') + ctx.set_geometry_shader(gs) + + # fragment shader + fs = Shader(''' + FRAG + DCL IN[0], COLOR, LINEAR + DCL OUT[0], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:END + ''') + ctx.set_fragment_shader(fs) + + nverts = 3 + nattrs = 2 + verts = FloatArray(nverts * nattrs * 4) + + verts[ 0] = 0.0 # x1 + verts[ 1] = 0.8 # y1 + verts[ 2] = 0.2 # z1 + verts[ 3] = 1.0 # w1 + verts[ 4] = 1.0 # r1 + verts[ 5] = 0.0 # g1 + verts[ 6] = 0.0 # b1 + verts[ 7] = 1.0 # a1 + verts[ 8] = -0.8 # x2 + verts[ 9] = -0.8 # y2 + verts[10] = 0.5 # z2 + verts[11] = 1.0 # w2 + verts[12] = 0.0 # r2 + verts[13] = 1.0 # g2 + verts[14] = 0.0 # b2 + verts[15] = 1.0 # a2 + verts[16] = 0.8 # x3 + verts[17] = -0.8 # y3 + verts[18] = 0.8 # z3 + verts[19] = 1.0 # w3 + verts[20] = 0.0 # r3 + verts[21] = 0.0 # g3 + verts[22] = 1.0 # b3 + verts[23] = 1.0 # a3 + + ctx.draw_vertices(PIPE_PRIM_TRIANGLES, + nverts, + nattrs, + verts) + + ctx.flush() + + show_image(cbuf) + #show_image(zbuf) + #save_image('cbuf.png', cbuf) + #save_image('zbuf.png', zbuf) + + + +def main(): + dev = Device() + test(dev) + + +if __name__ == '__main__': + main() diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py index 87acf60366d..e5e168bdc8d 100644 --- a/src/gallium/state_trackers/python/samples/tri.py +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -118,7 +118,7 @@ def test(dev): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() @@ -136,10 +136,10 @@ def test(dev): cbuf = dev.texture_create( PIPE_FORMAT_X8R8G8B8_UNORM, width, height, - tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, ).get_surface() zbuf = dev.texture_create( - PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, width, height, tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL, ).get_surface() diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index 10c7ecbd78f..d144af2447d 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -135,7 +135,9 @@ st_context_destroy(struct st_context *st_ctx) st_ctx->pipe->destroy(st_ctx->pipe); for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) - pipe_texture_reference(&st_ctx->sampler_textures[i], NULL); + pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], NULL); + for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], NULL); pipe_texture_reference(&st_ctx->default_texture, NULL); FREE(st_ctx); @@ -276,9 +278,12 @@ st_context_create(struct st_device *st_dev) } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - pipe_texture_reference(&st_ctx->sampler_textures[i], st_ctx->default_texture); + pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], st_ctx->default_texture); + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], st_ctx->default_texture); - cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->sampler_textures); + cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->fragment_sampler_textures); + cso_set_vertex_sampler_textures(st_ctx->cso, PIPE_MAX_VERTEX_SAMPLERS, st_ctx->vertex_sampler_textures); } /* vertex shader */ diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h index a246b6a1f25..f786e134118 100644 --- a/src/gallium/state_trackers/python/st_device.h +++ b/src/gallium/state_trackers/python/st_device.h @@ -57,9 +57,11 @@ struct st_context { void *vs; void *fs; + void *gs; struct pipe_texture *default_texture; - struct pipe_texture *sampler_textures[PIPE_MAX_SAMPLERS]; + struct pipe_texture *fragment_sampler_textures[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_sampler_textures[PIPE_MAX_VERTEX_SAMPLERS]; unsigned num_vertex_buffers; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/state_trackers/python/st_llvmpipe_winsys.c b/src/gallium/state_trackers/python/st_llvmpipe_winsys.c new file mode 100644 index 00000000000..0096b18c994 --- /dev/null +++ b/src/gallium/state_trackers/python/st_llvmpipe_winsys.c @@ -0,0 +1,148 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * Llvmpipe support. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "llvmpipe/lp_winsys.h" +#include "st_winsys.h" + + +static boolean +llvmpipe_ws_is_displaytarget_format_supported( struct llvmpipe_winsys *ws, + enum pipe_format format ) +{ + return FALSE; +} + + +static void * +llvmpipe_ws_displaytarget_map(struct llvmpipe_winsys *ws, + struct llvmpipe_displaytarget *dt, + unsigned flags ) +{ + assert(0); + return NULL; +} + + +static void +llvmpipe_ws_displaytarget_unmap(struct llvmpipe_winsys *ws, + struct llvmpipe_displaytarget *dt ) +{ + assert(0); +} + + +static void +llvmpipe_ws_displaytarget_destroy(struct llvmpipe_winsys *winsys, + struct llvmpipe_displaytarget *dt) +{ + assert(0); +} + + +static struct llvmpipe_displaytarget * +llvmpipe_ws_displaytarget_create(struct llvmpipe_winsys *winsys, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) +{ + return NULL; +} + + +static void +llvmpipe_ws_displaytarget_display(struct llvmpipe_winsys *winsys, + struct llvmpipe_displaytarget *dt, + void *context_private) +{ + assert(0); +} + + +static void +llvmpipe_ws_destroy(struct llvmpipe_winsys *winsys) +{ + FREE(winsys); +} + + +static struct pipe_screen * +st_llvmpipe_screen_create(void) +{ + static struct llvmpipe_winsys *winsys; + struct pipe_screen *screen; + + winsys = CALLOC_STRUCT(llvmpipe_winsys); + if (!winsys) + goto no_winsys; + + winsys->destroy = llvmpipe_ws_destroy; + winsys->is_displaytarget_format_supported = llvmpipe_ws_is_displaytarget_format_supported; + winsys->displaytarget_create = llvmpipe_ws_displaytarget_create; + winsys->displaytarget_map = llvmpipe_ws_displaytarget_map; + winsys->displaytarget_unmap = llvmpipe_ws_displaytarget_unmap; + winsys->displaytarget_display = llvmpipe_ws_displaytarget_display; + winsys->displaytarget_destroy = llvmpipe_ws_displaytarget_destroy; + + screen = llvmpipe_create_screen(winsys); + if (!screen) + goto no_screen; + + return screen; + +no_screen: + FREE(winsys); +no_winsys: + return NULL; +} + + +static struct pipe_context * +st_llvmpipe_context_create(struct pipe_screen *screen) +{ + return llvmpipe_create(screen); +} + + +const struct st_winsys st_softpipe_winsys = { + &st_llvmpipe_screen_create, + &st_llvmpipe_context_create, +}; diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py index 35673b3ec92..8d3bf9d4d7e 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py @@ -96,7 +96,7 @@ def test(dev, name): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() @@ -114,7 +114,7 @@ def test(dev, name): cbuf = dev.texture_create( PIPE_FORMAT_X8R8G8B8_UNORM, width, height, - tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, ).get_surface() fb = Framebuffer() fb.width = width diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py index 5be1ca80f30..01bf5a3210d 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py @@ -96,7 +96,7 @@ def test(dev, name): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() @@ -114,7 +114,7 @@ def test(dev, name): cbuf = dev.texture_create( PIPE_FORMAT_X8R8G8B8_UNORM, width, height, - tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, ).get_surface() fb = Framebuffer() fb.width = width diff --git a/src/gallium/state_trackers/python/tests/texture_render.py b/src/gallium/state_trackers/python/tests/texture_render.py index 8a2db9dbcff..79287f2cace 100755 --- a/src/gallium/state_trackers/python/tests/texture_render.py +++ b/src/gallium/state_trackers/python/tests/texture_render.py @@ -144,8 +144,8 @@ class TextureTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) - ctx.set_sampler_texture(0, src_texture) + ctx.set_fragment_sampler(0, sampler) + ctx.set_fragment_sampler_texture(0, src_texture) # framebuffer cbuf_tex = dev.texture_create( diff --git a/src/gallium/state_trackers/python/tests/texture_sample.py b/src/gallium/state_trackers/python/tests/texture_sample.py index 92a6c4dfb9f..520961c8051 100755 --- a/src/gallium/state_trackers/python/tests/texture_sample.py +++ b/src/gallium/state_trackers/python/tests/texture_sample.py @@ -169,7 +169,7 @@ class TextureColorSampleTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # texture texture = dev.texture_create( @@ -189,7 +189,7 @@ class TextureColorSampleTest(TestCase): zslice = zslice, ).sample_rgba(expected_rgba) - ctx.set_sampler_texture(0, texture) + ctx.set_fragment_sampler_texture(0, texture) # framebuffer cbuf_tex = dev.texture_create( @@ -359,7 +359,7 @@ class TextureDepthSampleTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # texture texture = dev.texture_create( @@ -379,7 +379,7 @@ class TextureDepthSampleTest(TestCase): zslice = zslice, ).sample_rgba(expected_rgba) - ctx.set_sampler_texture(0, texture) + ctx.set_fragment_sampler_texture(0, texture) # framebuffer cbuf_tex = dev.texture_create( diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile index b8c805b06c4..fc97bf51f8f 100644 --- a/src/gallium/state_trackers/vega/Makefile +++ b/src/gallium/state_trackers/vega/Makefile @@ -61,14 +61,7 @@ VG_MINOR = 0 VG_TINY = 0 GALLIUM_LIBS = \ - $(GALLIUM)/src/gallium/auxiliary/pipebuffer/libpipebuffer.a \ - $(GALLIUM)/src/gallium/auxiliary/sct/libsct.a \ - $(GALLIUM)/src/gallium/auxiliary/draw/libdraw.a \ - $(GALLIUM)/src/gallium/auxiliary/rtasm/librtasm.a \ - $(GALLIUM)/src/gallium/auxiliary/translate/libtranslate.a \ - $(GALLIUM)/src/gallium/auxiliary/cso_cache/libcso_cache.a \ - $(GALLIUM)/src/gallium/auxiliary/util/libutil.a \ - $(GALLIUM)/src/gallium/auxiliary/tgsi/libtgsi.a + $(GALLIUM)/src/gallium/auxiliary/libgallium.a .SUFFIXES : .cpp diff --git a/src/gallium/state_trackers/vega/api_path.c b/src/gallium/state_trackers/vega/api_path.c index a6b7a2bb93a..15ac1900f4b 100644 --- a/src/gallium/state_trackers/vega/api_path.c +++ b/src/gallium/state_trackers/vega/api_path.c @@ -164,8 +164,7 @@ void vgAppendPathData(VGPath dstPath, return; } for (i = 0; i < numSegments; ++i) { - if (pathSegments[i] < VG_CLOSE_PATH || - pathSegments[i] > VG_LCWARC_TO_REL) { + if (pathSegments[i] > VG_LCWARC_TO_REL) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } diff --git a/src/gallium/state_trackers/vega/arc.c b/src/gallium/state_trackers/vega/arc.c index 8b04d21ea76..2d123408702 100644 --- a/src/gallium/state_trackers/vega/arc.c +++ b/src/gallium/state_trackers/vega/arc.c @@ -528,7 +528,6 @@ static INLINE int num_beziers_needed(struct arc *arc) double threshold = 0.05; VGboolean found = VG_FALSE; int n = 1; - int i; double min_eta, max_eta; min_eta = MIN2(arc->eta1, arc->eta2); @@ -538,6 +537,7 @@ static INLINE int num_beziers_needed(struct arc *arc) double d_eta = (max_eta - min_eta) / n; if (d_eta <= 0.5 * M_PI) { double eta_b = min_eta; + int i; found = VG_TRUE; for (i = 0; found && (i < n); ++i) { double etaA = eta_b; diff --git a/src/gallium/state_trackers/vega/bezier.c b/src/gallium/state_trackers/vega/bezier.c index 0d5504004cc..5769e8ea868 100644 --- a/src/gallium/state_trackers/vega/bezier.c +++ b/src/gallium/state_trackers/vega/bezier.c @@ -256,7 +256,6 @@ static enum shift_result good_offset(const struct bezier *b1, const float max_dist_normal = threshold*offset; const float spacing = 0.25; float i; - for (i = spacing; i < 0.99; i += spacing) { float p1[2],p2[2], d, l; float normal[2]; diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 278ba6d46eb..1112ad9839d 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -644,7 +644,7 @@ VGint image_sampler_filter(struct vg_context *ctx) return PIPE_TEX_FILTER_NEAREST; break; case VG_IMAGE_QUALITY_BETTER: - /*return PIPE_TEX_FILTER_ANISO;*/ + /* possibly use anisotropic filtering */ return PIPE_TEX_FILTER_LINEAR; break; default: diff --git a/src/gallium/state_trackers/vega/stroker.c b/src/gallium/state_trackers/vega/stroker.c index 1b92d2b5c62..68a52029db0 100644 --- a/src/gallium/state_trackers/vega/stroker.c +++ b/src/gallium/state_trackers/vega/stroker.c @@ -476,7 +476,7 @@ static enum intersection_type line_intersect(const VGfloat *l1, const VGfloat *l2, float *intersection_point) { - VGfloat isect[2]; + VGfloat isect[2] = { 0 }; enum intersection_type type; VGboolean dx_zero, ldx_zero; @@ -649,7 +649,7 @@ static void create_joins(struct stroker *stroker, VGfloat prev_line[] = {stroker->back2_x, stroker->back2_y, stroker->back1_x, stroker->back1_y}; - VGfloat isect[2]; + VGfloat isect[2] = { 0 }; enum intersection_type type = line_intersect(prev_line, next_line, isect); if (join == SquareJoin) { diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index e5039132758..ff80aab03a3 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -36,6 +36,9 @@ #include "util/u_math.h" #include "util/u_rect.h" +/* advertise OpenVG support */ +PUBLIC const int st_api_OpenVG = 1; + static struct pipe_texture * create_texture(struct pipe_context *pipe, enum pipe_format format, VGint width, VGint height) @@ -368,14 +371,15 @@ void st_unreference_framebuffer(struct st_framebuffer *stfb) /* FIXME */ } -void st_make_current(struct vg_context *st, - struct st_framebuffer *draw, - struct st_framebuffer *read) +boolean st_make_current(struct vg_context *st, + struct st_framebuffer *draw, + struct st_framebuffer *read) { vg_set_current_context(st); if (st) { st->draw_buffer = draw; } + return VG_TRUE; } struct vg_context *st_get_current(void) @@ -425,3 +429,8 @@ int st_unbind_texture_surface(struct pipe_surface *ps, int target, int level) { return 0; } + +st_proc st_get_proc_address(const char *procname) +{ + return NULL; +} diff --git a/src/gallium/state_trackers/vega/vg_tracker.h b/src/gallium/state_trackers/vega/vg_tracker.h index 5457631106f..c1196954a76 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.h +++ b/src/gallium/state_trackers/vega/vg_tracker.h @@ -45,15 +45,19 @@ struct pipe_fence_handle; struct pipe_surface; +PUBLIC struct vg_context *st_create_context(struct pipe_context *pipe, const void *visual, struct vg_context *share); +PUBLIC void st_destroy_context( struct vg_context *st ); +PUBLIC void st_copy_context_state(struct vg_context *dst, struct vg_context *src, uint mask); +PUBLIC struct st_framebuffer *st_create_framebuffer(const void *visual, enum pipe_format colorFormat, enum pipe_format depthFormat, @@ -61,47 +65,63 @@ struct st_framebuffer *st_create_framebuffer(const void *visual, uint width, uint height, void *privateData); +PUBLIC void st_resize_framebuffer(struct st_framebuffer *stfb, uint width, uint height); +PUBLIC void st_set_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface *surf); +PUBLIC void st_get_framebuffer_dimensions( struct st_framebuffer *stfb, uint *width, uint *height); +PUBLIC int st_bind_texture_surface(struct pipe_surface *ps, int target, int level, enum pipe_format format); +PUBLIC int st_unbind_texture_surface(struct pipe_surface *ps, int target, int level); +PUBLIC int st_get_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface **surf); +PUBLIC int st_get_framebuffer_texture(struct st_framebuffer *stfb, uint surfIndex, struct pipe_texture **tex); +PUBLIC void *st_framebuffer_private(struct st_framebuffer *stfb); +PUBLIC void st_unreference_framebuffer(struct st_framebuffer *stfb); -void st_make_current(struct vg_context *st, - struct st_framebuffer *draw, - struct st_framebuffer *read); +PUBLIC +boolean st_make_current(struct vg_context *st, + struct st_framebuffer *draw, + struct st_framebuffer *read); +PUBLIC struct vg_context *st_get_current(void); +PUBLIC void st_flush(struct vg_context *st, uint pipeFlushFlags, struct pipe_fence_handle **fence); +PUBLIC void st_finish(struct vg_context *st); +PUBLIC void st_notify_swapbuffers(struct st_framebuffer *stfb); +PUBLIC void st_notify_swapbuffers_complete(struct st_framebuffer *stfb); /** Generic function type */ typedef void (*st_proc)(); +PUBLIC st_proc st_get_proc_address(const char *procname); #endif diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index b05944a33b3..352c087475e 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -11,10 +11,11 @@ if env['platform'] in ['windows']: '.', ]) - env.Append(CPPDEFINES = [ + env.AppendUnique(CPPDEFINES = [ '_GDI32_', # prevent wgl* being declared __declspec(dllimport) 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers 'WIN32_THREADS', # use Win32 thread API + 'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx ]) sources = [ diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index c776faa53f8..650d2c0d1db 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -71,6 +71,8 @@ struct crtc_private static void crtc_dpms(xf86CrtcPtr crtc, int mode) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ + switch (mode) { case DPMSModeOn: case DPMSModeStandby: @@ -121,7 +123,8 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, drm_mode.vrefresh = mode->VRefresh; if (!mode->name) xf86SetModeDefaultName(mode); - strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN); + strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN - 1); + drm_mode.name[DRM_DISPLAY_MODE_LEN - 1] = '\0'; ret = drmModeSetCrtc(ms->fd, drm_crtc->crtc_id, ms->fb_id, x, y, &drm_connector->connector_id, 1, &drm_mode); @@ -147,18 +150,23 @@ crtc_gamma_set(xf86CrtcPtr crtc, CARD16 * red, CARD16 * green, CARD16 * blue, static void * crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ + return NULL; } static PixmapPtr crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ + return NULL; } static void crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr rotate_pixmap, void *data) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ } /* diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index 8a24aa10a3c..b02fe68f313 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -181,8 +181,7 @@ drv_crtc_resize(ScrnInfoPtr pScrn, int width, int height) if (!pScreen->ModifyPixmapHeader(rootPixmap, width, height, -1, -1, -1, NULL)) return FALSE; - /* HW dependent - FIXME */ - pScrn->displayWidth = pScrn->virtualX; + pScrn->displayWidth = rootPixmap->devKind / (rootPixmap->drawable.bitsPerPixel / 8); /* now create new frontbuffer */ return ms->create_front_buffer(pScrn) && ms->bind_front_buffer(pScrn); @@ -220,6 +219,12 @@ static Bool drv_init_resource_management(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); + /* + ScreenPtr pScreen = pScrn->pScreen; + PixmapPtr rootPixmap = pScreen->GetScreenPixmap(pScreen); + Bool fbAccessDisabled; + CARD8 *fbstart; + */ if (ms->screen || ms->kms) return TRUE; @@ -249,9 +254,19 @@ static Bool drv_close_resource_management(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); + int i; - if (ms->screen) + if (ms->screen) { + assert(ms->ctx == NULL); + + for (i = 0; i < XORG_NR_FENCES; i++) { + if (ms->fence[i]) { + ms->screen->fence_finish(ms->screen, ms->fence[i], 0); + ms->screen->fence_reference(ms->screen, &ms->fence[i], NULL); + } + } ms->screen->destroy(ms->screen); + } ms->screen = NULL; if (ms->api && ms->api->destroy) @@ -461,7 +476,7 @@ static void drv_block_handler(int i, pointer blockData, pointer pTimeout, * quite small. Let us get a fair way ahead of hardware before * throttling. */ - for (j = 0; j < XORG_NR_FENCES; j++) + for (j = 0; j < XORG_NR_FENCES - 1; j++) ms->screen->fence_reference(ms->screen, &ms->fence[j], ms->fence[j+1]); @@ -915,6 +930,12 @@ drv_destroy_front_buffer_kms(ScrnInfoPtr pScrn) ScreenPtr pScreen = pScrn->pScreen; PixmapPtr rootPixmap = pScreen->GetScreenPixmap(pScreen); + /* XXX Do something with the rootPixmap. + * This currently works fine but if we are getting crashes in + * the fb functions after VT switches maybe look more into it. + */ + (void)rootPixmap; + if (!ms->root_bo) return TRUE; diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index d5c005ebadd..d9432babf18 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -516,6 +516,7 @@ ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, #endif debug_assert(priv == exa->copy.dst); + (void) priv; if (exa->copy.use_surface_copy) { /* XXX: consider exposing >1 box in surface_copy interface. @@ -810,34 +811,7 @@ xorg_exa_set_shared_usage(PixmapPtr pPixmap) return 0; } -unsigned -xorg_exa_get_pixmap_handle(PixmapPtr pPixmap, unsigned *stride_out) -{ - ScreenPtr pScreen = pPixmap->drawable.pScreen; - ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; - modesettingPtr ms = modesettingPTR(pScrn); - struct exa_pixmap_priv *priv; - unsigned handle; - unsigned stride; - if (!ms->exa) { - FatalError("NO MS->EXA\n"); - return 0; - } - - priv = exaGetPixmapDriverPrivate(pPixmap); - - if (!priv) { - FatalError("NO PIXMAP PRIVATE\n"); - return 0; - } - - ms->api->local_handle_from_texture(ms->api, ms->screen, priv->tex, &stride, &handle); - if (stride_out) - *stride_out = stride; - - return handle; -} static Bool size_match( int width, int tex_width ) @@ -1019,6 +993,9 @@ xorg_exa_close(ScrnInfoPtr pScrn) if (exa->pipe) exa->pipe->destroy(exa->pipe); + exa->pipe = NULL; + /* Since this was shared be proper with the pointer */ + ms->ctx = NULL; exaDriverFini(pScrn->pScreen); xfree(exa); diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index 89b794a09ac..bed17caab77 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -47,22 +47,22 @@ static void print_fs_traits(int fs_traits) { const char *strings[] = { - "FS_COMPOSITE", /* = 1 << 0 */ - "FS_MASK", /* = 1 << 1 */ - "FS_SOLID_FILL", /* = 1 << 2 */ - "FS_LINGRAD_FILL", /* = 1 << 3 */ - "FS_RADGRAD_FILL", /* = 1 << 4 */ - "FS_CA_FULL", /* = 1 << 5 - src.rgba * mask.rgba */ - "FS_CA_SRCALPHA", /* = 1 << 6 - src.aaaa * mask.rgba */ - "FS_YUV", /* = 1 << 7 */ - "FS_SRC_REPEAT_NONE", /* = 1 << 8 */ - "FS_MASK_REPEAT_NONE",/* = 1 << 9 */ - "FS_SRC_SWIZZLE_RGB", /* = 1 << 10 */ - "FS_MASK_SWIZZLE_RGB",/* = 1 << 11 */ - "FS_SRC_SET_ALPHA", /* = 1 << 12 */ - "FS_MASK_SET_ALPHA", /* = 1 << 13 */ - "FS_SRC_LUMINANCE", /* = 1 << 14 */ - "FS_MASK_LUMINANCE", /* = 1 << 15 */ + "FS_COMPOSITE", /* = 1 << 0, */ + "FS_MASK", /* = 1 << 1, */ + "FS_SOLID_FILL", /* = 1 << 2, */ + "FS_LINGRAD_FILL", /* = 1 << 3, */ + "FS_RADGRAD_FILL", /* = 1 << 4, */ + "FS_CA_FULL", /* = 1 << 5, */ /* src.rgba * mask.rgba */ + "FS_CA_SRCALPHA", /* = 1 << 6, */ /* src.aaaa * mask.rgba */ + "FS_YUV", /* = 1 << 7, */ + "FS_SRC_REPEAT_NONE", /* = 1 << 8, */ + "FS_MASK_REPEAT_NONE",/* = 1 << 9, */ + "FS_SRC_SWIZZLE_RGB", /* = 1 << 10, */ + "FS_MASK_SWIZZLE_RGB",/* = 1 << 11, */ + "FS_SRC_SET_ALPHA", /* = 1 << 12, */ + "FS_MASK_SET_ALPHA", /* = 1 << 13, */ + "FS_SRC_LUMINANCE", /* = 1 << 14, */ + "FS_MASK_LUMINANCE", /* = 1 << 15, */ }; int i, k; debug_printf("%s: ", __func__); @@ -492,6 +492,7 @@ create_fs(struct pipe_context *pipe, /* it has to be either a fill, a composite op or a yuv conversion */ debug_assert((is_fill ^ is_composite) ^ is_yuv); + (void) is_yuv; out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index f53b53bc62a..8f729b565b1 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -438,6 +438,7 @@ void renderer_copy_prepare(struct xorg_renderer *r, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + (void) screen; /* set misc state we care about */ diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index c0cfbe60616..4d5d4780dc4 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -135,9 +135,6 @@ typedef struct _modesettingRec struct pipe_texture * xorg_exa_get_texture(PixmapPtr pPixmap); -unsigned -xorg_exa_get_pixmap_handle(PixmapPtr pPixmap, unsigned *stride); - int xorg_exa_set_displayed_usage(PixmapPtr pPixmap); diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index 6b5a41a3727..5bf0e94b627 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -486,8 +486,11 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, int dxo, dyo; Bool hdtv; int x, y, w, h; - struct exa_pixmap_priv *dst = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); + struct exa_pixmap_priv *dst; + struct pipe_surface *dst_surf = NULL; + + exaMoveInPixmap(pPixmap); + dst = exaGetPixmapDriverPrivate(pPixmap); if (dst && !dst->tex) { xorg_exa_set_shared_usage(pPixmap); @@ -497,6 +500,7 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, if (!dst || !dst->tex) XORG_FALLBACK("Xv destination %s", !dst ? "!dst" : "!dst->tex"); + dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y)); REGION_TRANSLATE(pScrn->pScreen, dstRegion, -pPixmap->screen_x, @@ -516,7 +520,6 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, bind_samplers(pPriv); setup_fs_video_constants(pPriv->r, hdtv); - exaMoveInPixmap(pPixmap); DamageDamageRegion(&pPixmap->drawable, dstRegion); while (nbox--) { diff --git a/src/gallium/winsys/drm/Makefile.egl_g3d b/src/gallium/winsys/drm/Makefile.egl_g3d new file mode 100644 index 00000000000..3ce27258529 --- /dev/null +++ b/src/gallium/winsys/drm/Makefile.egl_g3d @@ -0,0 +1,64 @@ +# src/gallium/winsys/drm/Makefile.egl_g3d + +# The driver Makefile should define +# +# EGL_DRIVER_NAME, the name of the driver +# EGL_DRIVER_SOURCES, the sources of the driver +# EGL_DRIVER_LIBS, extra libraries needed by the driver +# EGL_DRIVER_PIPES, the pipe drivers of the driver +# +# before including this file. + +EGL_DRIVER_OBJECTS = $(EGL_DRIVER_SOURCES:.c=.o) + +common_LIBS = -ldrm -lm -ldl + +x11_ST = $(TOP)/src/gallium/state_trackers/egl_g3d/libeglx11.a +x11_LIBS = $(common_LIBS) -lX11 -lXext -lXfixes + +kms_ST = $(TOP)/src/gallium/state_trackers/egl_g3d/libeglkms.a +kms_LIBS = $(common_LIBS) + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + + +##### TARGETS ##### + +EGL_DISPLAY_DRIVERS = $(foreach dpy, $(EGL_DISPLAYS), egl_$(dpy)_$(EGL_DRIVER_NAME).so) + +LIB_GALLIUM_DIR = $(TOP)/$(LIB_DIR)/gallium +EGL_DISPLAY_LIBS = $(foreach drv, $(EGL_DISPLAY_DRIVERS), $(LIB_GALLIUM_DIR)/$(drv)) + +default: $(EGL_DISPLAY_LIBS) + +$(EGL_DISPLAY_LIBS): $(LIB_GALLIUM_DIR)/%.so: %.so + @mkdir -p $(LIB_GALLIUM_DIR) + $(INSTALL) $^ $(LIB_GALLIUM_DIR) + +define mklib-egl +$(MKLIB) -noprefix -o $@ $(EGL_DRIVER_OBJECTS) \ + -Wl,--whole-archive $($(1)_ST) -Wl,--no-whole-archive \ + $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $($(1)_LIBS) $(EGL_DRIVER_LIBS) +endef + +egl_x11_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(x11_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile + $(call mklib-egl,x11) + +egl_kms_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(kms_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile + $(call mklib-egl,kms) + +clean: + -rm -f $(EGL_DRIVER_OBJECTS) + -rm -f $(EGL_DISPLAY_DRIVERS) + +install: $(EGL_DISPLAY_LIBS) + @$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) + @echo "Install $(EGL_DISPLAY_DRIVERS)" + @for lib in "$(EGL_DISPLAY_LIBS)"; do \ + $(MINSTALL) -m 755 "$$lib" $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR); \ + done + +depend: diff --git a/src/gallium/winsys/drm/i965/dri/SConscript b/src/gallium/winsys/drm/i965/dri/SConscript index 233ef464be5..a99533fd245 100644 --- a/src/gallium/winsys/drm/i965/dri/SConscript +++ b/src/gallium/winsys/drm/i965/dri/SConscript @@ -14,6 +14,6 @@ drivers = [ env.LoadableModule( target ='i965_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = drivers + mesa + auxiliaries + env['LIBS'], + LIBS = drivers + mesa + gallium + env['LIBS'], SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/i965/egl_g3d/Makefile b/src/gallium/winsys/drm/i965/egl_g3d/Makefile new file mode 100644 index 00000000000..dd2efe24855 --- /dev/null +++ b/src/gallium/winsys/drm/i965/egl_g3d/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +EGL_DRIVER_NAME = i965 +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_intel + +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/i965/gem/libi965drm.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/i965/libi965.a + +include ../../Makefile.egl_g3d diff --git a/src/gallium/winsys/drm/i965/egl_g3d/dummy.c b/src/gallium/winsys/drm/i965/egl_g3d/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/i965/egl_g3d/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c index fc9678d2b62..9a2203fc6d7 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c @@ -228,6 +228,7 @@ destroy(struct drm_api *api) struct drm_api i965_libdrm_api = { + .name = "i965", .create_context = i965_libdrm_create_context, .create_screen = i965_libdrm_create_screen, .texture_from_shared_handle = i965_libdrm_texture_from_shared_handle, diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index b1b654d9f8b..104e987083f 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -15,6 +15,6 @@ drivers = [ env.LoadableModule( target ='i915_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = drivers + mesa + auxiliaries + env['LIBS'], + LIBS = drivers + mesa + gallium + env['LIBS'], SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/intel/egl_g3d/Makefile b/src/gallium/winsys/drm/intel/egl_g3d/Makefile new file mode 100644 index 00000000000..cdbb680773c --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl_g3d/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +EGL_DRIVER_NAME = i915 +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_intel + +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/intel/gem/libinteldrm.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/i915/libi915.a + +include ../../Makefile.egl_g3d diff --git a/src/gallium/winsys/drm/intel/egl_g3d/dummy.c b/src/gallium/winsys/drm/intel/egl_g3d/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl_g3d/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c index 5ed2a10af1c..450ae09b345 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c @@ -195,6 +195,7 @@ destroy(struct drm_api *api) struct drm_api intel_drm_api = { + .name = "i915", .create_context = intel_drm_create_context, .create_screen = intel_drm_create_screen, .texture_from_shared_handle = intel_drm_texture_from_shared_handle, diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index 7106a06492d..4b2c6a1025e 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -87,6 +87,7 @@ nouveau_drm_create_screen(struct drm_api *api, int fd, case 0x60: init = nv40_screen_create; break; + case 0x50: case 0x80: case 0x90: case 0xa0: @@ -164,6 +165,7 @@ nouveau_drm_create_context(struct drm_api *api, struct pipe_screen *pscreen) case 0x60: init = nv40_create; break; + case 0x50: case 0x80: case 0x90: case 0xa0: @@ -252,6 +254,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen, } struct drm_api drm_api_hooks = { + .name = "nouveau", .create_screen = nouveau_drm_create_screen, .create_context = nouveau_drm_create_context, .texture_from_shared_handle = nouveau_drm_pt_from_name, diff --git a/src/gallium/winsys/drm/nouveau/egl_g3d/Makefile b/src/gallium/winsys/drm/nouveau/egl_g3d/Makefile new file mode 100644 index 00000000000..865a5d56a97 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/egl_g3d/Makefile @@ -0,0 +1,19 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +EGL_DRIVER_NAME = nouveau +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_nouveau + +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ + $(TOP)/src/gallium/drivers/nv04/libnv04.a \ + $(TOP)/src/gallium/drivers/nv10/libnv10.a \ + $(TOP)/src/gallium/drivers/nv20/libnv20.a \ + $(TOP)/src/gallium/drivers/nv30/libnv30.a \ + $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a \ + $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + +include ../../Makefile.egl_g3d diff --git a/src/gallium/winsys/drm/nouveau/egl_g3d/dummy.c b/src/gallium/winsys/drm/nouveau/egl_g3d/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/egl_g3d/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index d2367b245a2..385fa857b56 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -146,16 +146,17 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; - if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) { - priv->flush_cb(priv->flush_data); - } - if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { uint32_t domain; if (radeon_bo_is_busy(radeon_buffer->bo, &domain)) return NULL; } + + if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) { + priv->flush_cb(priv->flush_data); + } + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) { write = 1; } @@ -280,58 +281,3 @@ struct radeon_winsys* radeon_pipe_winsys(int fd) return radeon_ws; } -#if 0 -static struct pipe_buffer *radeon_buffer_from_handle(struct radeon_screen *radeon_screen, - uint32_t handle) -{ - struct radeon_pipe_buffer *radeon_buffer; - struct radeon_bo *bo = NULL; - - bo = radeon_bo_open(radeon_screen->bom, handle, 0, 0, 0, 0); - if (bo == NULL) { - return NULL; - } - radeon_buffer = calloc(1, sizeof(struct radeon_pipe_buffer)); - if (radeon_buffer == NULL) { - radeon_bo_unref(bo); - return NULL; - } - pipe_reference_init(&radeon_buffer->base.reference, 1); - radeon_buffer->base.usage = PIPE_BUFFER_USAGE_PIXEL; - radeon_buffer->bo = bo; - return &radeon_buffer->base; -} - -struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context, - uint32_t handle, - enum pipe_format format, - int w, int h, int pitch) -{ - struct pipe_screen *pipe_screen = radeon_context->pipe_screen; - struct pipe_winsys *pipe_winsys = radeon_context->pipe_winsys; - struct pipe_texture tmpl; - struct pipe_surface *ps; - struct pipe_texture *pt; - struct pipe_buffer *pb; - - pb = radeon_buffer_from_handle(radeon_context->radeon_screen, handle); - if (pb == NULL) { - return NULL; - } - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; - tmpl.target = PIPE_TEXTURE_2D; - tmpl.width0 = w; - tmpl.height0 = h; - tmpl.depth0 = 1; - tmpl.format = format; - - pt = pipe_screen->texture_blanket(pipe_screen, &tmpl, &pitch, pb); - if (pt == NULL) { - pipe_buffer_reference(&pb, NULL); - } - ps = pipe_screen->get_tex_surface(pipe_screen, pt, 0, 0, 0, - PIPE_BUFFER_USAGE_GPU_WRITE); - return ps; -} -#endif diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index dec7c065036..851c2236979 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -206,7 +206,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api, int retval, fd; struct drm_gem_flink flink; struct radeon_pipe_buffer* radeon_buffer; - struct pipe_buffer *buffer; + struct pipe_buffer *buffer = NULL; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; @@ -239,7 +239,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, unsigned *stride, unsigned *handle) { - struct pipe_buffer *buffer; + struct pipe_buffer *buffer = NULL; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; } @@ -252,6 +252,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, } struct drm_api drm_api_hooks = { + .name = "radeon", .create_screen = radeon_create_screen, .create_context = radeon_create_context, .texture_from_shared_handle = radeon_texture_from_shared_handle, diff --git a/src/gallium/winsys/drm/radeon/dri/Makefile b/src/gallium/winsys/drm/radeon/dri/Makefile index a9889444de8..eaa34180321 100644 --- a/src/gallium/winsys/drm/radeon/dri/Makefile +++ b/src/gallium/winsys/drm/radeon/dri/Makefile @@ -2,7 +2,7 @@ TOP = ../../../../../.. include $(TOP)/configs/current -LIBNAME = radeon_dri.so +LIBNAME = radeong_dri.so MINIGLX_SOURCES = diff --git a/src/gallium/winsys/drm/radeon/dri/SConscript b/src/gallium/winsys/drm/radeon/dri/SConscript index aea987a3aca..c4989d1b595 100644 --- a/src/gallium/winsys/drm/radeon/dri/SConscript +++ b/src/gallium/winsys/drm/radeon/dri/SConscript @@ -13,5 +13,5 @@ drivers = [ env.SharedLibrary( target ='radeon_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = st_dri + radeonwinsys + mesa + drivers + auxiliaries + env['LIBS'], + LIBS = st_dri + radeonwinsys + mesa + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/winsys/drm/radeon/egl_g3d/Makefile b/src/gallium/winsys/drm/radeon/egl_g3d/Makefile new file mode 100644 index 00000000000..9027a5ff46d --- /dev/null +++ b/src/gallium/winsys/drm/radeon/egl_g3d/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +EGL_DRIVER_NAME = r300 +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = -ldrm_radeon + +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/radeon/core/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/r300/libr300.a + +include ../../Makefile.egl_g3d diff --git a/src/gallium/winsys/drm/radeon/egl_g3d/dummy.c b/src/gallium/winsys/drm/radeon/egl_g3d/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/egl_g3d/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/radeon/python/SConscript b/src/gallium/winsys/drm/radeon/python/SConscript index 3200fd8d1b0..91cae986975 100644 --- a/src/gallium/winsys/drm/radeon/python/SConscript +++ b/src/gallium/winsys/drm/radeon/python/SConscript @@ -29,5 +29,5 @@ if env['platform'] == 'linux': env.SharedLibrary( target ='_gallium', source = sources, - LIBS = [pyst] + drivers + auxiliaries + env['LIBS'], + LIBS = [pyst] + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/winsys/drm/radeon/xorg/Makefile b/src/gallium/winsys/drm/radeon/xorg/Makefile index 9fa16dab24c..0eb1b3988f3 100644 --- a/src/gallium/winsys/drm/radeon/xorg/Makefile +++ b/src/gallium/winsys/drm/radeon/xorg/Makefile @@ -1,11 +1,16 @@ -TARGET = modesetting_drv.so -CFILES = $(wildcard ./*.c) -OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) -GALLIUMDIR = ../../../.. TOP = ../../../../../.. + +GALLIUMDIR = $(TOP)/src/gallium + +TARGET = radeong_drv.so + +CFILES = $(wildcard ./*.c) + include ${TOP}/configs/current +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) + CFLAGS = -DHAVE_CONFIG_H \ -g -Wall -Wimplicit-function-declaration -fPIC \ $(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \ @@ -24,16 +29,21 @@ LIBS = \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) +TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) ############################################# +all default: $(TARGET) $(TARGET_STAGING) - -all default: $(TARGET) - -$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a +$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS) $(TOP)/bin/mklib -noprefix -o $@ \ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_radeon +$(TOP)/$(LIB_DIR)/gallium: + mkdir -p $@ + +$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium + $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium + clean: rm -rf $(OBJECTS) $(TARGET) diff --git a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c index 837f2aa8fec..bb76cc03499 100644 --- a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c +++ b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c @@ -53,7 +53,7 @@ static PciChipsets radeon_xorg_pci_devices[] = { }; static XF86ModuleVersionInfo radeon_xorg_version = { - "modesetting", + "radeong", MODULEVENDORSTRING, MODINFOSTRING1, MODINFOSTRING2, @@ -69,9 +69,9 @@ static XF86ModuleVersionInfo radeon_xorg_version = { * Xorg driver exported structures */ -_X_EXPORT DriverRec modesetting = { +_X_EXPORT DriverRec radeong = { 1, - "modesetting", + "radeong", radeon_xorg_identify, NULL, xorg_tracker_available_options, @@ -84,7 +84,7 @@ _X_EXPORT DriverRec modesetting = { static MODULESETUPPROTO(radeon_xorg_setup); -_X_EXPORT XF86ModuleData modesettingModuleData = { +_X_EXPORT XF86ModuleData radeongModuleData = { &radeon_xorg_version, radeon_xorg_setup, NULL @@ -103,7 +103,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) */ if (!setupDone) { setupDone = 1; - xf86AddDriver(&modesetting, module, HaveDriverFuncs); + xf86AddDriver(&radeong, module, HaveDriverFuncs); /* * The return value must be non-NULL on success even though there @@ -120,7 +120,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) static void radeon_xorg_identify(int flags) { - xf86PrintChipsets("modesetting", "Driver for Modesetting Kernel Drivers", + xf86PrintChipsets("radeong", "Driver for Radeon Gallium with KMS", radeon_xorg_chipsets); } @@ -135,8 +135,8 @@ radeon_xorg_pci_probe(DriverPtr driver, NULL, NULL, NULL, NULL, NULL); if (scrn != NULL) { scrn->driverVersion = 1; - scrn->driverName = "radeon"; - scrn->name = "modesetting"; + scrn->driverName = "radeong"; + scrn->name = "radeong"; scrn->Probe = NULL; entity = xf86GetEntityInfo(entity_num); diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c index 5995eee34ba..4f5ccea4677 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c @@ -358,6 +358,7 @@ static struct dri1_api dri1_api_hooks = { }; static struct drm_api vmw_drm_api_hooks = { + .name = "vmwgfx", .create_screen = vmw_drm_create_screen, .create_context = vmw_drm_create_context, .texture_from_shared_handle = vmw_drm_texture_from_handle, diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript index 1019f577a5f..84319f91ff1 100644 --- a/src/gallium/winsys/drm/vmware/dri/SConscript +++ b/src/gallium/winsys/drm/vmware/dri/SConscript @@ -48,7 +48,7 @@ if env['platform'] == 'linux': svgadrm, svga, mesa, - auxiliaries, + gallium, ]) # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions diff --git a/src/gallium/winsys/drm/vmware/egl_g3d/Makefile b/src/gallium/winsys/drm/vmware/egl_g3d/Makefile new file mode 100644 index 00000000000..3cf79924e08 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/egl_g3d/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +EGL_DRIVER_NAME = vmwgfx +EGL_DRIVER_SOURCES = dummy.c +EGL_DRIVER_LIBS = + +EGL_DRIVER_PIPES = \ + $(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/svga/libsvga.a + +include ../../Makefile.egl_g3d diff --git a/src/gallium/winsys/drm/vmware/egl_g3d/dummy.c b/src/gallium/winsys/drm/vmware/egl_g3d/dummy.c new file mode 100644 index 00000000000..4a1bc28b0b6 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/egl_g3d/dummy.c @@ -0,0 +1 @@ +/* mklib expects at least one object file */ diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript index b8968e7137b..1e5d8ff7fed 100644 --- a/src/gallium/winsys/drm/vmware/xorg/SConscript +++ b/src/gallium/winsys/drm/vmware/xorg/SConscript @@ -38,12 +38,13 @@ if env['platform'] == 'linux': st_xorg, svgadrm, svga, - auxiliaries, + gallium, ]) sources = [ 'vmw_ioctl.c', 'vmw_screen.c', + 'vmw_video.c', 'vmw_xorg.c', ] diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 599973ce127..1d9bac3871c 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -274,7 +274,7 @@ xlib_eglTerminate(_EGLDriver *drv, _EGLDisplay *dpy) static _EGLProc -xlib_eglGetProcAddress(const char *procname) +xlib_eglGetProcAddress(_EGLDriver *drv, const char *procname) { return (_EGLProc) st_get_proc_address(procname); } @@ -751,24 +751,18 @@ xlib_eglReleaseTexImage(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, static EGLBoolean xlib_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw) { - /* error checking step: */ - if (!_eglSwapBuffers(drv, dpy, draw)) - return EGL_FALSE; - - { - struct xlib_egl_surface *xsurf = lookup_surface(draw); - struct pipe_winsys *pws = xsurf->winsys; - struct pipe_surface *psurf; + struct xlib_egl_surface *xsurf = lookup_surface(draw); + struct pipe_winsys *pws = xsurf->winsys; + struct pipe_surface *psurf; - st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, - &psurf); + st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, + &psurf); - st_notify_swapbuffers(xsurf->Framebuffer); + st_notify_swapbuffers(xsurf->Framebuffer); - display_surface(pws, psurf, xsurf); + display_surface(pws, psurf, xsurf); - check_and_update_buffer_size(xsurf); - } + check_and_update_buffer_size(xsurf); return EGL_TRUE; } diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile index 2997f6b79ce..3965bd949f4 100644 --- a/src/gallium/winsys/g3dvl/nouveau/Makefile +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -19,11 +19,7 @@ CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ LDFLAGS += -L${DRMDIR}/lib \ -L${DRIDIR}/lib \ -L${GALLIUMDIR}/winsys/drm/nouveau/common \ - -L${GALLIUMDIR}/auxiliary/draw \ - -L${GALLIUMDIR}/auxiliary/tgsi \ - -L${GALLIUMDIR}/auxiliary/translate \ - -L${GALLIUMDIR}/auxiliary/rtasm \ - -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/auxiliary \ -L${GALLIUMDIR}/drivers/nv04 \ -L${GALLIUMDIR}/drivers/nv10 \ -L${GALLIUMDIR}/drivers/nv20 \ @@ -31,7 +27,7 @@ LDFLAGS += -L${DRMDIR}/lib \ -L${GALLIUMDIR}/drivers/nv40 \ -L${GALLIUMDIR}/drivers/nv50 -LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm +LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -lgallium -lm ############################################# diff --git a/src/gallium/winsys/g3dvl/xlib/Makefile b/src/gallium/winsys/g3dvl/xlib/Makefile index cf765ef51a5..9877660a276 100644 --- a/src/gallium/winsys/g3dvl/xlib/Makefile +++ b/src/gallium/winsys/g3dvl/xlib/Makefile @@ -25,13 +25,7 @@ SOURCES = xsp_winsys.c OBJECTS = $(SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o LIBS = $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/auxiliary/vl/libvl.a \ - $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \ - $(TOP)/src/gallium/auxiliary/draw/libdraw.a \ - $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \ - $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \ - $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \ - $(TOP)/src/gallium/auxiliary/util/libutil.a + $(TOP)/src/gallium/auxiliary/libgallium.a .c.o: $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript index 74f6b2fd475..4cbc86f3311 100644 --- a/src/gallium/winsys/gdi/SConscript +++ b/src/gallium/winsys/gdi/SConscript @@ -42,8 +42,10 @@ if env['platform'] == 'windows': drivers += [trace] + env['no_import_lib'] = 1 + env.SharedLibrary( target ='opengl32', source = sources, - LIBS = wgl + glapi + mesa + drivers + auxiliaries + glsl + env['LIBS'], + LIBS = wgl + glapi + mesa + drivers + gallium + glsl + env['LIBS'], ) diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index a0293fe9b4b..9482e8f9b11 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -23,17 +23,14 @@ INCLUDE_DIRS = \ -I$(TOP)/src/gallium/auxiliary DEFINES += \ - -DGALLIUM_SOFTPIPE \ - -DGALLIUM_TRACE \ - -DGALLIUM_BRW + -DGALLIUM_SOFTPIPE #-DGALLIUM_CELL will be defined by the config */ XLIB_WINSYS_SOURCES = \ xlib.c \ xlib_cell.c \ xlib_llvmpipe.c \ - xlib_softpipe.c \ - xlib_trace.c + xlib_softpipe.c XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index dfe550f733b..a4dabb7804c 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -3,55 +3,66 @@ Import('*') -if env['platform'] == 'linux' \ - and 'mesa' in env['statetrackers'] \ - and set(('softpipe', 'llvmpipe', 'i915', 'trace')).intersection(env['drivers']) \ - and not env['dri']: - - env = env.Clone() - - env.Append(CPPPATH = [ - '#/src/mesa', - '#/src/mesa/main', - '#src/gallium/state_trackers/glx/xlib', - ]) - - env.Append(CPPDEFINES = ['USE_XSHM']) - - sources = [ - 'xlib.c', - ] - - drivers = [] - - if 'softpipe' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') - sources += ['xlib_softpipe.c'] - drivers += [softpipe] - - if 'llvmpipe' in env['drivers']: - env.Tool('llvm') - if 'LLVM_VERSION' in env: - env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') - env.Tool('udis86') - sources += ['xlib_llvmpipe.c'] - drivers += [llvmpipe] - - if 'cell' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_CELL') - sources += ['xlib_cell.c'] - drivers += [cell] - - if 'trace' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_TRACE') - sources += ['xlib_trace.c'] - drivers += [trace] - - # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions - libgl = env.SharedLibrary( - target ='GL', - source = sources, - LIBS = st_xlib + glapi + mesa + drivers + auxiliaries + env['LIBS'], - ) +if env['platform'] != 'linux': + Return() +if 'mesa' not in env['statetrackers']: + print 'warning: Mesa state tracker disabled: skipping build of xlib libGL.so' + Return() + +if env['dri']: + print 'warning: DRI enabled: skipping build of xlib libGL.so' + Return() + +if 'trace' not in env['drivers']: + print 'warning: trace pipe driver disabled: skipping build of xlib libGL.so' + Return() + +if not set(('softpipe', 'llvmpipe', 'trace')).intersection(env['drivers']): + print 'warning: no supported pipe driver: skipping build of xlib libGL.so' + Return() + +env = env.Clone() + +env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', + '#src/gallium/state_trackers/glx/xlib', +]) + +env.Append(CPPDEFINES = ['USE_XSHM']) + +sources = [ + 'xlib.c', +] + +drivers = [trace] + +if 'softpipe' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') + sources += ['xlib_softpipe.c'] + drivers += [softpipe] + +if 'llvmpipe' in env['drivers']: + env.Tool('llvm') + if 'LLVM_VERSION' in env: + env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') + env.Tool('udis86') + sources += ['xlib_llvmpipe.c'] + drivers += [llvmpipe] + +if 'cell' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_CELL') + sources += ['xlib_cell.c'] + drivers += [cell] + +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions +libgl = env.SharedLibrary( + target ='GL', + source = sources, + LIBS = st_xlib + glapi + mesa + glsl + drivers + gallium + env['LIBS'], +) + +if not env['dri']: + # Only install this libGL.so if DRI not enabled env.InstallSharedLibrary(libgl, version=(1, 5)) diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c index 163cc8863cb..13117712c23 100644 --- a/src/gallium/winsys/xlib/xlib.c +++ b/src/gallium/winsys/xlib/xlib.c @@ -42,18 +42,16 @@ */ enum mode { - MODE_TRACE, MODE_CELL, MODE_LLVMPIPE, MODE_SOFTPIPE }; +/* advertise OpenGL support */ +PUBLIC const int st_api_OpenGL = 1; static enum mode get_mode() { - if (getenv("XMESA_TRACE")) - return MODE_TRACE; - #ifdef GALLIUM_CELL if (!getenv("GALLIUM_NOCELL")) return MODE_CELL; @@ -73,11 +71,6 @@ static void _init( void ) enum mode xlib_mode = get_mode(); switch (xlib_mode) { - case MODE_TRACE: -#if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE) - xmesa_set_driver( &xlib_trace_driver ); -#endif - break; case MODE_CELL: #if defined(GALLIUM_CELL) xmesa_set_driver( &xlib_cell_driver ); diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h index f0855035f77..8e091d0c084 100644 --- a/src/gallium/winsys/xlib/xlib.h +++ b/src/gallium/winsys/xlib/xlib.h @@ -5,7 +5,6 @@ #include "pipe/p_compiler.h" #include "xm_winsys.h" -extern struct xm_driver xlib_trace_driver; extern struct xm_driver xlib_softpipe_driver; extern struct xm_driver xlib_llvmpipe_driver; extern struct xm_driver xlib_cell_driver; diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c new file mode 100644 index 00000000000..fc9addd09e3 --- /dev/null +++ b/src/gallium/winsys/xlib/xlib_brw_context.c @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +/* #include "glxheader.h" */ +/* #include "xmesaP.h" */ + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "i965simple/brw_winsys.h" +#include "xlib_brw_aub.h" +#include "xlib_brw.h" + + + + +#define XBCWS_BATCHBUFFER_SIZE 1024 + + +/* The backend to the brw driver (ie struct brw_winsys) is actually a + * per-context entity. + */ +struct xlib_brw_context_winsys { + struct brw_winsys brw_context_winsys; /**< batch buffer funcs */ + struct aub_context *aub; + + struct pipe_winsys *pipe_winsys; + + unsigned batch_data[XBCWS_BATCHBUFFER_SIZE]; + unsigned batch_nr; + unsigned batch_size; + unsigned batch_alloc; +}; + + +/* Turn a brw_winsys into an xlib_brw_context_winsys: + */ +static inline struct xlib_brw_context_winsys * +xlib_brw_context_winsys( struct brw_winsys *sws ) +{ + return (struct xlib_brw_context_winsys *)sws; +} + + +/* Simple batchbuffer interface: + */ + +static unsigned *xbcws_batch_start( struct brw_winsys *sws, + unsigned dwords, + unsigned relocs ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + if (xbcws->batch_size < xbcws->batch_nr + dwords) + return NULL; + + xbcws->batch_alloc = xbcws->batch_nr + dwords; + return (void *)1; /* not a valid pointer! */ +} + +static void xbcws_batch_dword( struct brw_winsys *sws, + unsigned dword ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr < xbcws->batch_alloc); + xbcws->batch_data[xbcws->batch_nr++] = dword; +} + +static void xbcws_batch_reloc( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr < xbcws->batch_alloc); + xbcws->batch_data[xbcws->batch_nr++] = + ( xlib_brw_get_buffer_offset( NULL, buf, access_flags ) + + delta ); +} + +static void xbcws_batch_end( struct brw_winsys *sws ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr <= xbcws->batch_alloc); + xbcws->batch_alloc = 0; +} + +static void xbcws_batch_flush( struct brw_winsys *sws, + struct pipe_fence_handle **fence ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + assert(xbcws->batch_nr <= xbcws->batch_size); + + if (xbcws->batch_nr) { + xlib_brw_commands_aub( xbcws->pipe_winsys, + xbcws->batch_data, + xbcws->batch_nr ); + } + + xbcws->batch_nr = 0; +} + + + +/* Really a per-device function, just pass through: + */ +static unsigned xbcws_get_buffer_offset( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + return xlib_brw_get_buffer_offset( xbcws->pipe_winsys, + buf, + access_flags ); +} + + +/* Really a per-device function, just pass through: + */ +static void xbcws_buffer_subdata_typed( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + xlib_brw_buffer_subdata_typed( xbcws->pipe_winsys, + buf, + offset, + size, + data, + data_type ); +} + + +/** + * Create i965 hardware rendering context, but plugged into a + * dump-to-aubfile backend. + */ +struct pipe_context * +xlib_create_brw_context( struct pipe_screen *screen, + void *unused ) +{ + struct xlib_brw_context_winsys *xbcws = CALLOC_STRUCT( xlib_brw_context_winsys ); + + /* Fill in this struct with callbacks that i965simple will need to + * communicate with the window system, buffer manager, etc. + */ + xbcws->brw_context_winsys.batch_start = xbcws_batch_start; + xbcws->brw_context_winsys.batch_dword = xbcws_batch_dword; + xbcws->brw_context_winsys.batch_reloc = xbcws_batch_reloc; + xbcws->brw_context_winsys.batch_end = xbcws_batch_end; + xbcws->brw_context_winsys.batch_flush = xbcws_batch_flush; + xbcws->brw_context_winsys.buffer_subdata_typed = xbcws_buffer_subdata_typed; + xbcws->brw_context_winsys.get_buffer_offset = xbcws_get_buffer_offset; + + xbcws->pipe_winsys = screen->winsys; /* redundant */ + + xbcws->batch_size = XBCWS_BATCHBUFFER_SIZE; + + /* Create the i965simple context: + */ +#ifdef GALLIUM_CELL + return NULL; +#else + return brw_create( screen, + &xbcws->brw_context_winsys, + 0 ); +#endif +} diff --git a/src/gallium/winsys/xlib/xlib_trace.c b/src/gallium/winsys/xlib/xlib_trace.c deleted file mode 100644 index dbea655ab45..00000000000 --- a/src/gallium/winsys/xlib/xlib_trace.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/* - * Authors: - * Keith Whitwell - * Brian Paul - */ - - -#include "xlib.h" - -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#include "trace/tr_texture.h" - -#include "pipe/p_screen.h" - - - -static struct pipe_screen * -xlib_create_trace_screen( void ) -{ - struct pipe_screen *screen, *trace_screen; - - screen = xlib_softpipe_driver.create_pipe_screen(); - if (screen == NULL) - goto fail; - - /* Wrap it: - */ - trace_screen = trace_screen_create(screen); - if (trace_screen == NULL) - goto fail; - - return trace_screen; - -fail: - if (screen) - screen->destroy( screen ); - return NULL; -} - -static struct pipe_context * -xlib_create_trace_context( struct pipe_screen *_screen, - void *priv ) -{ - struct trace_screen *tr_scr = trace_screen( _screen ); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_context *pipe, *trace_pipe; - - pipe = xlib_softpipe_driver.create_pipe_context( screen, priv ); - if (pipe == NULL) - goto fail; - - /* Wrap it: - */ - trace_pipe = trace_context_create(_screen, pipe); - if (trace_pipe == NULL) - goto fail; - - trace_pipe->priv = priv; - - return trace_pipe; - -fail: - if (pipe) - pipe->destroy( pipe ); - return NULL; -} - -static void -xlib_trace_display_surface( struct xmesa_buffer *buffer, - struct pipe_surface *_surf ) -{ - struct trace_surface *tr_surf = trace_surface( _surf ); - struct pipe_surface *surf = tr_surf->surface; - - xlib_softpipe_driver.display_surface( buffer, surf ); -} - - -struct xm_driver xlib_trace_driver = -{ - .create_pipe_screen = xlib_create_trace_screen, - .create_pipe_context = xlib_create_trace_context, - .display_surface = xlib_trace_display_surface, -}; |