diff options
author | Michal Krol <[email protected]> | 2010-01-05 11:04:50 +0100 |
---|---|---|
committer | Michal Krol <[email protected]> | 2010-01-05 11:04:50 +0100 |
commit | 9b21b3c52a8a7d58d08151d1a6bf25c472dec213 (patch) | |
tree | d9083b6af4e2e9b70a7fa6cd31bac45a36e0f6b6 /src/gallium | |
parent | 543b9566bdaa48fea2df1866fa1310c1cdbcde27 (diff) | |
parent | 1f9aa38f4e2be47229d92be2c1189c2b8d9c7133 (diff) |
Merge branch 'master' into instanced-arrays
Conflicts:
src/gallium/auxiliary/tgsi/tgsi_dump.c
src/gallium/include/pipe/p_shader_tokens.h
Diffstat (limited to 'src/gallium')
245 files changed, 5480 insertions, 3577 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 63983c52201..136423513c6 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -54,13 +54,13 @@ install: ##### RULES ##### .c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ .cpp.o: - $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CXX) -c $(INCLUDES) $(DEFINES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ .S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ sinclude depend diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 8be84cddbe7..eea32b1314b 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -2,29 +2,7 @@ import os Import('*') -env = env.Clone() - -auxiliaries = [] - -Export('auxiliaries') - - -if llvm: - SConscript(['auxiliary/gallivm/SConscript']) - -SConscript([ - # NOTE: order matters! - 'auxiliary/util/SConscript', - 'auxiliary/rtasm/SConscript', - 'auxiliary/tgsi/SConscript', - 'auxiliary/cso_cache/SConscript', - 'auxiliary/translate/SConscript', - 'auxiliary/draw/SConscript', - 'auxiliary/pipebuffer/SConscript', - 'auxiliary/indices/SConscript', - 'auxiliary/rbug/SConscript', - 'auxiliary/vl/SConscript', -]) +SConscript('auxiliary/SConscript') for driver in env['drivers']: SConscript(os.path.join('drivers', driver, 'SConscript')) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 5446eb68a98..e3af41c6e04 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -1,12 +1,177 @@ -# src/gallium/auxiliary/Makefile TOP = ../../.. include $(TOP)/configs/current -SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) +LIBNAME = gallium -default install clean: - @for dir in $(SUBDIRS) ; do \ - if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE) $@) || exit 1; \ - fi \ - done +C_SOURCES = \ + cso_cache/cso_context.c \ + cso_cache/cso_cache.c \ + cso_cache/cso_hash.c \ + draw/draw_context.c \ + draw/draw_gs.c \ + draw/draw_pipe.c \ + draw/draw_pipe_aaline.c \ + draw/draw_pipe_aapoint.c \ + draw/draw_pipe_clip.c \ + draw/draw_pipe_cull.c \ + draw/draw_pipe_flatshade.c \ + draw/draw_pipe_offset.c \ + draw/draw_pipe_pstipple.c \ + draw/draw_pipe_stipple.c \ + draw/draw_pipe_twoside.c \ + draw/draw_pipe_unfilled.c \ + draw/draw_pipe_util.c \ + draw/draw_pipe_validate.c \ + draw/draw_pipe_vbuf.c \ + draw/draw_pipe_wide_line.c \ + draw/draw_pipe_wide_point.c \ + draw/draw_pt.c \ + draw/draw_pt_elts.c \ + draw/draw_pt_emit.c \ + draw/draw_pt_fetch.c \ + draw/draw_pt_fetch_emit.c \ + draw/draw_pt_fetch_shade_emit.c \ + draw/draw_pt_fetch_shade_pipeline.c \ + draw/draw_pt_post_vs.c \ + draw/draw_pt_util.c \ + draw/draw_pt_varray.c \ + draw/draw_pt_vcache.c \ + draw/draw_vertex.c \ + draw/draw_vs.c \ + draw/draw_vs_varient.c \ + draw/draw_vs_aos.c \ + draw/draw_vs_aos_io.c \ + draw/draw_vs_aos_machine.c \ + draw/draw_vs_exec.c \ + draw/draw_vs_llvm.c \ + draw/draw_vs_ppc.c \ + draw/draw_vs_sse.c \ + indices/u_indices_gen.c \ + indices/u_unfilled_gen.c \ + pipebuffer/pb_buffer_fenced.c \ + pipebuffer/pb_buffer_malloc.c \ + pipebuffer/pb_bufmgr_alt.c \ + pipebuffer/pb_bufmgr_cache.c \ + pipebuffer/pb_bufmgr_debug.c \ + pipebuffer/pb_bufmgr_fenced.c \ + pipebuffer/pb_bufmgr_mm.c \ + pipebuffer/pb_bufmgr_ondemand.c \ + pipebuffer/pb_bufmgr_pool.c \ + pipebuffer/pb_bufmgr_slab.c \ + pipebuffer/pb_validate.c \ + rbug/rbug_connection.c \ + rbug/rbug_core.c \ + rbug/rbug_texture.c \ + rbug/rbug_context.c \ + rbug/rbug_shader.c \ + rbug/rbug_demarshal.c \ + rtasm/rtasm_cpu.c \ + rtasm/rtasm_execmem.c \ + rtasm/rtasm_x86sse.c \ + rtasm/rtasm_ppc.c \ + rtasm/rtasm_ppc_spe.c \ + tgsi/tgsi_sanity.c \ + tgsi/tgsi_build.c \ + tgsi/tgsi_dump.c \ + tgsi/tgsi_exec.c \ + tgsi/tgsi_info.c \ + tgsi/tgsi_iterate.c \ + tgsi/tgsi_parse.c \ + tgsi/tgsi_ppc.c \ + tgsi/tgsi_scan.c \ + tgsi/tgsi_sse2.c \ + tgsi/tgsi_text.c \ + tgsi/tgsi_transform.c \ + tgsi/tgsi_ureg.c \ + tgsi/tgsi_util.c \ + translate/translate_generic.c \ + translate/translate_sse.c \ + translate/translate.c \ + translate/translate_cache.c \ + util/u_debug.c \ + util/u_debug_dump.c \ + util/u_debug_symbol.c \ + util/u_debug_stack.c \ + util/u_blit.c \ + util/u_blitter.c \ + util/u_cache.c \ + util/u_cpu_detect.c \ + util/u_dl.c \ + util/u_draw_quad.c \ + util/u_format.c \ + util/u_format_access.c \ + util/u_format_table.c \ + util/u_gen_mipmap.c \ + util/u_handle_table.c \ + util/u_hash_table.c \ + util/u_hash.c \ + util/u_keymap.c \ + util/u_linear.c \ + util/u_network.c \ + util/u_math.c \ + util/u_mm.c \ + util/u_rect.c \ + util/u_simple_shaders.c \ + util/u_snprintf.c \ + util/u_stream_stdc.c \ + util/u_stream_wd.c \ + util/u_surface.c \ + util/u_texture.c \ + util/u_tile.c \ + util/u_time.c \ + util/u_timed_winsys.c \ + util/u_upload_mgr.c \ + util/u_simple_screen.c \ + vl/vl_bitstream_parser.c \ + vl/vl_mpeg12_mc_renderer.c \ + vl/vl_compositor.c \ + vl/vl_csc.c \ + vl/vl_shader_build.c + +GALLIVM_SOURCES = \ + gallivm/gallivm.cpp \ + gallivm/gallivm_cpu.cpp \ + gallivm/instructions.cpp \ + gallivm/loweringpass.cpp \ + gallivm/tgsitollvm.cpp \ + gallivm/storage.cpp \ + gallivm/storagesoa.cpp \ + gallivm/instructionssoa.cpp + +INC_SOURCES = \ + gallivm/gallivm_builtins.cpp \ + gallivm/gallivmsoabuiltins.cpp + +# XXX: gallivm doesn't build correctly so disable for now +#ifeq ($(MESA_LLVM),1) +#DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +#CPP_SOURCES += \ +# $(GALLIVM_SOURCES) +#endif + + +include ../Makefile.template + + +gallivm/gallivm_builtins.cpp: gallivm/llvm_builtins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp1.bin + +gallivm/gallivmsoabuiltins.cpp: gallivm/soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp2.bin + + +indices/u_indices_gen.c: indices/u_indices_gen.py + python $< > $@ + +indices/u_unfilled_gen.c: indices/u_unfilled_gen.py + python $< > $@ + +util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv + python util/u_format_table.py util/u_format.csv > $@ + +util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv + python util/u_format_access.py util/u_format.csv > $@ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript new file mode 100644 index 00000000000..782eb533863 --- /dev/null +++ b/src/gallium/auxiliary/SConscript @@ -0,0 +1,185 @@ +Import('*') + +from sys import executable as python_cmd + +env.Append(CPPPATH = [ + 'indices', + 'util', +]) + +env.CodeGenerate( + target = 'indices/u_indices_gen.c', + script = 'indices/u_indices_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'indices/u_unfilled_gen.c', + script = 'indices/u_unfilled_gen.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_table.c', + script = 'util/u_format_table.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +env.CodeGenerate( + target = 'util/u_format_access.c', + script = 'util/u_format_access.py', + source = ['util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + +source = [ + 'cso_cache/cso_context.c', + 'cso_cache/cso_cache.c', + 'cso_cache/cso_hash.c', + 'draw/draw_context.c', + 'draw/draw_pipe.c', + 'draw/draw_pipe_aaline.c', + 'draw/draw_pipe_aapoint.c', + 'draw/draw_pipe_clip.c', + 'draw/draw_pipe_cull.c', + 'draw/draw_pipe_flatshade.c', + 'draw/draw_pipe_offset.c', + 'draw/draw_pipe_pstipple.c', + 'draw/draw_pipe_stipple.c', + 'draw/draw_pipe_twoside.c', + 'draw/draw_pipe_unfilled.c', + 'draw/draw_pipe_util.c', + 'draw/draw_pipe_validate.c', + 'draw/draw_pipe_vbuf.c', + 'draw/draw_pipe_wide_line.c', + 'draw/draw_pipe_wide_point.c', + 'draw/draw_pt.c', + 'draw/draw_pt_elts.c', + 'draw/draw_pt_emit.c', + 'draw/draw_pt_fetch.c', + 'draw/draw_pt_fetch_emit.c', + 'draw/draw_pt_fetch_shade_emit.c', + 'draw/draw_pt_fetch_shade_pipeline.c', + 'draw/draw_pt_post_vs.c', + 'draw/draw_pt_util.c', + 'draw/draw_pt_varray.c', + 'draw/draw_pt_vcache.c', + 'draw/draw_vertex.c', + 'draw/draw_vs.c', + 'draw/draw_vs_aos.c', + 'draw/draw_vs_aos_io.c', + 'draw/draw_vs_aos_machine.c', + 'draw/draw_vs_exec.c', + 'draw/draw_vs_llvm.c', + 'draw/draw_vs_ppc.c', + 'draw/draw_vs_sse.c', + 'draw/draw_vs_varient.c', + 'draw/draw_gs.c', + #'indices/u_indices.c', + #'indices/u_unfilled_indices.c', + 'indices/u_indices_gen.c', + 'indices/u_unfilled_gen.c', + 'pipebuffer/pb_buffer_fenced.c', + 'pipebuffer/pb_buffer_malloc.c', + 'pipebuffer/pb_bufmgr_alt.c', + 'pipebuffer/pb_bufmgr_cache.c', + 'pipebuffer/pb_bufmgr_debug.c', + 'pipebuffer/pb_bufmgr_fenced.c', + 'pipebuffer/pb_bufmgr_mm.c', + 'pipebuffer/pb_bufmgr_ondemand.c', + 'pipebuffer/pb_bufmgr_pool.c', + 'pipebuffer/pb_bufmgr_slab.c', + 'pipebuffer/pb_validate.c', + 'rbug/rbug_core.c', + 'rbug/rbug_shader.c', + 'rbug/rbug_context.c', + 'rbug/rbug_texture.c', + 'rbug/rbug_demarshal.c', + 'rbug/rbug_connection.c', + 'rtasm/rtasm_cpu.c', + 'rtasm/rtasm_execmem.c', + 'rtasm/rtasm_x86sse.c', + 'rtasm/rtasm_ppc.c', + 'rtasm/rtasm_ppc_spe.c', + 'tgsi/tgsi_build.c', + 'tgsi/tgsi_dump.c', + 'tgsi/tgsi_dump_c.c', + 'tgsi/tgsi_exec.c', + 'tgsi/tgsi_info.c', + 'tgsi/tgsi_iterate.c', + 'tgsi/tgsi_parse.c', + 'tgsi/tgsi_sanity.c', + 'tgsi/tgsi_scan.c', + 'tgsi/tgsi_ppc.c', + 'tgsi/tgsi_sse2.c', + 'tgsi/tgsi_text.c', + 'tgsi/tgsi_transform.c', + 'tgsi/tgsi_ureg.c', + 'tgsi/tgsi_util.c', + 'translate/translate_generic.c', + 'translate/translate_sse.c', + 'translate/translate.c', + 'translate/translate_cache.c', + 'util/u_bitmask.c', + 'util/u_blit.c', + 'util/u_blitter.c', + 'util/u_cache.c', + 'util/u_cpu_detect.c', + 'util/u_debug.c', + 'util/u_debug_dump.c', + 'util/u_debug_memory.c', + 'util/u_debug_stack.c', + 'util/u_debug_symbol.c', + 'util/u_dl.c', + 'util/u_draw_quad.c', + 'util/u_format.c', + 'util/u_format_access.c', + 'util/u_format_table.c', + 'util/u_gen_mipmap.c', + 'util/u_handle_table.c', + 'util/u_hash.c', + 'util/u_hash_table.c', + 'util/u_keymap.c', + 'util/u_network.c', + 'util/u_math.c', + 'util/u_mm.c', + 'util/u_rect.c', + 'util/u_simple_shaders.c', + 'util/u_snprintf.c', + 'util/u_stream_stdc.c', + 'util/u_stream_wd.c', + 'util/u_surface.c', + 'util/u_texture.c', + 'util/u_tile.c', + 'util/u_time.c', + 'util/u_timed_winsys.c', + 'util/u_upload_mgr.c', + 'util/u_simple_screen.c', + 'vl/vl_bitstream_parser.c', + 'vl/vl_mpeg12_mc_renderer.c', + 'vl/vl_compositor.c', + 'vl/vl_csc.c', + 'vl/vl_shader_build.c', +] + +if env['llvm']: + source += [ + 'gallivm/gallivm.cpp', + 'gallivm/gallivm_cpu.cpp', + 'gallivm/instructions.cpp', + 'gallivm/loweringpass.cpp', + 'gallivm/tgsitollvm.cpp', + 'gallivm/storage.cpp', + 'gallivm/storagesoa.cpp', + 'gallivm/instructionssoa.cpp', + ] + +gallium = env.ConvenienceLibrary( + target = 'gallium', + source = source, +) + +Export('gallium') diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile deleted file mode 100644 index 8726afcd949..00000000000 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = cso_cache - -C_SOURCES = \ - cso_context.c \ - cso_cache.c \ - cso_hash.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript deleted file mode 100644 index 651e68a191a..00000000000 --- a/src/gallium/auxiliary/cso_cache/SConscript +++ /dev/null @@ -1,11 +0,0 @@ -Import('*') - -cso_cache = env.ConvenienceLibrary( - target = 'cso_cache', - source = [ - 'cso_context.c', - 'cso_cache.c', - 'cso_hash.c', - ]) - -auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 80bd0c91db0..2b16332e143 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -42,6 +42,7 @@ #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" #include "cso_cache/cso_hash.h" +#include "cso_context.h" struct cso_context { struct pipe_context *pipe; @@ -85,8 +86,8 @@ struct cso_context { void *blend, *blend_saved; void *depth_stencil, *depth_stencil_saved; void *rasterizer, *rasterizer_saved; - void *fragment_shader, *fragment_shader_saved; - void *vertex_shader, *vertex_shader_saved; + void *fragment_shader, *fragment_shader_saved, *geometry_shader; + void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved; struct pipe_framebuffer_state fb, fb_saved; struct pipe_viewport_state vp, vp_saved; @@ -1027,3 +1028,38 @@ enum pipe_error cso_set_blend_color(struct cso_context *ctx, } return PIPE_OK; } + +enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx, + void *handle) +{ + if (ctx->geometry_shader != handle) { + ctx->geometry_shader = handle; + ctx->pipe->bind_gs_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle) +{ + if (handle == ctx->geometry_shader) { + /* unbind before deleting */ + ctx->pipe->bind_gs_state(ctx->pipe, NULL); + ctx->geometry_shader = NULL; + } + ctx->pipe->delete_gs_state(ctx->pipe, handle); +} + +void cso_save_geometry_shader(struct cso_context *ctx) +{ + assert(!ctx->geometry_shader_saved); + ctx->geometry_shader_saved = ctx->geometry_shader; +} + +void cso_restore_geometry_shader(struct cso_context *ctx) +{ + if (ctx->geometry_shader_saved != ctx->geometry_shader) { + ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved); + ctx->geometry_shader = ctx->geometry_shader_saved; + } + ctx->geometry_shader_saved = NULL; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index e5b92177cfd..b9e313e32d6 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -146,6 +146,13 @@ void cso_save_vertex_shader(struct cso_context *cso); void cso_restore_vertex_shader(struct cso_context *cso); +enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx, + void *handle); +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle); +void cso_save_geometry_shader(struct cso_context *cso); +void cso_restore_geometry_shader(struct cso_context *cso); + + enum pipe_error cso_set_framebuffer(struct cso_context *cso, const struct pipe_framebuffer_state *fb); diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile deleted file mode 100644 index 5041dcc072b..00000000000 --- a/src/gallium/auxiliary/draw/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = draw - -C_SOURCES = \ - draw_context.c \ - draw_pipe.c \ - draw_pipe_aaline.c \ - draw_pipe_aapoint.c \ - draw_pipe_clip.c \ - draw_pipe_cull.c \ - draw_pipe_flatshade.c \ - draw_pipe_offset.c \ - draw_pipe_pstipple.c \ - draw_pipe_stipple.c \ - draw_pipe_twoside.c \ - draw_pipe_unfilled.c \ - draw_pipe_util.c \ - draw_pipe_validate.c \ - draw_pipe_vbuf.c \ - draw_pipe_wide_line.c \ - draw_pipe_wide_point.c \ - draw_pt.c \ - draw_pt_elts.c \ - draw_pt_emit.c \ - draw_pt_fetch.c \ - draw_pt_fetch_emit.c \ - draw_pt_fetch_shade_emit.c \ - draw_pt_fetch_shade_pipeline.c \ - draw_pt_post_vs.c \ - draw_pt_util.c \ - draw_pt_varray.c \ - draw_pt_vcache.c \ - draw_vertex.c \ - draw_vs.c \ - draw_vs_varient.c \ - draw_vs_aos.c \ - draw_vs_aos_io.c \ - draw_vs_aos_machine.c \ - draw_vs_exec.c \ - draw_vs_llvm.c \ - draw_vs_ppc.c \ - draw_vs_sse.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript deleted file mode 100644 index 5f05aa324a5..00000000000 --- a/src/gallium/auxiliary/draw/SConscript +++ /dev/null @@ -1,46 +0,0 @@ -Import('*') - -draw = env.ConvenienceLibrary( - target = 'draw', - source = [ - 'draw_context.c', - 'draw_pipe.c', - 'draw_pipe_aaline.c', - 'draw_pipe_aapoint.c', - 'draw_pipe_clip.c', - 'draw_pipe_cull.c', - 'draw_pipe_flatshade.c', - 'draw_pipe_offset.c', - 'draw_pipe_pstipple.c', - 'draw_pipe_stipple.c', - 'draw_pipe_twoside.c', - 'draw_pipe_unfilled.c', - 'draw_pipe_util.c', - 'draw_pipe_validate.c', - 'draw_pipe_vbuf.c', - 'draw_pipe_wide_line.c', - 'draw_pipe_wide_point.c', - 'draw_pt.c', - 'draw_pt_elts.c', - 'draw_pt_emit.c', - 'draw_pt_fetch.c', - 'draw_pt_fetch_emit.c', - 'draw_pt_fetch_shade_emit.c', - 'draw_pt_fetch_shade_pipeline.c', - 'draw_pt_post_vs.c', - 'draw_pt_util.c', - 'draw_pt_varray.c', - 'draw_pt_vcache.c', - 'draw_vertex.c', - 'draw_vs.c', - 'draw_vs_aos.c', - 'draw_vs_aos_io.c', - 'draw_vs_aos_machine.c', - 'draw_vs_exec.c', - 'draw_vs_llvm.c', - 'draw_vs_ppc.c', - 'draw_vs_sse.c', - 'draw_vs_varient.c' - ]) - -auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index cc5f7f01059..667aa46b208 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -36,6 +36,7 @@ #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" +#include "draw_gs.h" #include "draw_pt.h" #include "draw_pipe.h" @@ -67,6 +68,9 @@ struct draw_context *draw_create( void ) if (!draw_vs_init( draw )) goto fail; + if (!draw_gs_init( draw )) + goto fail; + return draw; fail: @@ -231,11 +235,19 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, - const void *buffer, + unsigned shader_type, + const void *buffer, unsigned size ) { - draw->pt.user.constants = buffer; - draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + debug_assert(shader_type == PIPE_SHADER_VERTEX || + shader_type == PIPE_SHADER_GEOMETRY); + if (shader_type == PIPE_SHADER_VERTEX) { + draw->pt.user.vs_constants = buffer; + draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + } else if (shader_type == PIPE_SHADER_GEOMETRY) { + draw->pt.user.gs_constants = buffer; + draw_gs_set_constants( draw, (const float (*)[4])buffer, size ); + } } @@ -298,7 +310,7 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) * a post-transformed vertex. * * With this function, drivers that use the draw module should have no reason - * to track the current vertex shader. + * to track the current vertex/geometry shader. * * Note that the draw module may sometimes generate vertices with extra * attributes (such as texcoords for AA lines). The driver can call this @@ -309,43 +321,59 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable ) * work for the drivers. */ int -draw_find_vs_output(const struct draw_context *draw, - uint semantic_name, uint semantic_index) +draw_find_shader_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index) { const struct draw_vertex_shader *vs = draw->vs.vertex_shader; + const struct draw_geometry_shader *gs = draw->gs.geometry_shader; uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == semantic_name && - vs->info.output_semantic_index[i] == semantic_index) + const struct tgsi_shader_info *info = &vs->info; + + if (gs) + info = &gs->info; + + for (i = 0; i < info->num_outputs; i++) { + if (info->output_semantic_name[i] == semantic_name && + info->output_semantic_index[i] == semantic_index) return i; } /* XXX there may be more than one extra vertex attrib. * For example, simulated gl_FragCoord and gl_PointCoord. */ - if (draw->extra_vp_outputs.semantic_name == semantic_name && - draw->extra_vp_outputs.semantic_index == semantic_index) { - return draw->extra_vp_outputs.slot; + if (draw->extra_shader_outputs.semantic_name == semantic_name && + draw->extra_shader_outputs.semantic_index == semantic_index) { + return draw->extra_shader_outputs.slot; } + return 0; } /** - * Return number of vertex shader outputs. + * Return number of the shader outputs. + * + * If geometry shader is present, its output will be returned, + * if not vertex shader is used. */ uint -draw_num_vs_outputs(const struct draw_context *draw) +draw_num_shader_outputs(const struct draw_context *draw) { uint count = draw->vs.vertex_shader->info.num_outputs; - if (draw->extra_vp_outputs.slot > 0) + + /* if geometry shader is present, its outputs go to te + * driver, not the vertex shaders */ + if (draw->gs.geometry_shader) + count = draw->gs.geometry_shader->info.num_outputs; + + if (draw->extra_shader_outputs.slot > 0) count++; return count; } /** - * Provide TGSI sampler objects for vertex shaders that use texture fetches. + * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches. * This might only be used by software drivers for the time being. */ void @@ -355,6 +383,8 @@ draw_texture_samplers(struct draw_context *draw, { draw->vs.num_samplers = num_samplers; draw->vs.samplers = samplers; + draw->gs.num_samplers = num_samplers; + draw->gs.samplers = samplers; } @@ -421,3 +451,18 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) draw->flushing = FALSE; } } + + +int draw_current_shader_outputs(struct draw_context *draw) +{ + if (draw->gs.geometry_shader) + return draw->gs.num_gs_outputs; + return draw->vs.num_vs_outputs; +} + +int draw_current_shader_position_output(struct draw_context *draw) +{ + if (draw->gs.geometry_shader) + return draw->gs.position_output; + return draw->vs.position_output; +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c0f6a614115..8a64c06efcd 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -45,6 +45,7 @@ struct pipe_context; struct draw_context; struct draw_stage; struct draw_vertex_shader; +struct draw_geometry_shader; struct tgsi_sampler; @@ -85,11 +86,11 @@ draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe int -draw_find_vs_output(const struct draw_context *draw, - uint semantic_name, uint semantic_index); +draw_find_shader_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index); uint -draw_num_vs_outputs(const struct draw_context *draw); +draw_num_shader_outputs(const struct draw_context *draw); void @@ -112,6 +113,17 @@ void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs); +/* + * Geometry shader functions + */ +struct draw_geometry_shader * +draw_create_geometry_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dvs); +void draw_delete_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dvs); + /* * Vertex data functions @@ -140,6 +152,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); void draw_set_mapped_constant_buffer(struct draw_context *draw, + unsigned shader_type, const void *buffer, unsigned size ); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c new file mode 100644 index 00000000000..5db2e755423 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2009 VMWare Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "draw_gs.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" + +#include "pipe/p_shader_tokens.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#define MAX_PRIM_VERTICES 6 +/* fixme: move it from here */ +#define MAX_PRIMITIVES 64 + +boolean +draw_gs_init( struct draw_context *draw ) +{ + draw->gs.machine = tgsi_exec_machine_create(); + if (!draw->gs.machine) + return FALSE; + + draw->gs.machine->Primitives = align_malloc( + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); + if (!draw->gs.machine->Primitives) + return FALSE; + memset(draw->gs.machine->Primitives, 0, + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); + + return TRUE; +} + + +void draw_gs_set_constants( struct draw_context *draw, + const float (*constants)[4], + unsigned size ) +{ +} + + +struct draw_geometry_shader * +draw_create_geometry_shader(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct draw_geometry_shader *gs; + int i; + + gs = CALLOC_STRUCT(draw_geometry_shader); + + if (!gs) + return NULL; + + gs->state = *state; + gs->state.tokens = tgsi_dup_tokens(state->tokens); + if (!gs->state.tokens) { + FREE(gs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &gs->info); + + /* setup the defaults */ + gs->input_primitive = PIPE_PRIM_TRIANGLES; + gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP; + gs->max_output_vertices = 32; + + for (i = 0; i < gs->info.num_properties; ++i) { + if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_INPUT_PRIM) + gs->input_primitive = gs->info.properties[i].data[0]; + else if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_OUTPUT_PRIM) + gs->output_primitive = gs->info.properties[i].data[0]; + else if (gs->info.properties[i].name == + TGSI_PROPERTY_GS_MAX_VERTICES) + gs->max_output_vertices = gs->info.properties[i].data[0]; + } + + gs->machine = draw->gs.machine; + + if (gs) + { + uint i; + for (i = 0; i < gs->info.num_outputs; i++) { + if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && + gs->info.output_semantic_index[i] == 0) + gs->position_output = i; + } + } + + return gs; +} + +void draw_bind_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dgs) +{ + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + + if (dgs) { + draw->gs.geometry_shader = dgs; + draw->gs.num_gs_outputs = dgs->info.num_outputs; + draw->gs.position_output = dgs->position_output; + draw_geometry_shader_prepare(dgs, draw); + } + else { + draw->gs.geometry_shader = NULL; + draw->gs.num_gs_outputs = 0; + } +} + +void draw_delete_geometry_shader(struct draw_context *draw, + struct draw_geometry_shader *dgs) +{ + FREE(dgs); +} + +static INLINE int num_vertices_for_prim(int prim) +{ + switch(prim) { + case PIPE_PRIM_POINTS: + return 1; + case PIPE_PRIM_LINES: + return 2; + case PIPE_PRIM_LINE_LOOP: + return 2; + case PIPE_PRIM_LINE_STRIP: + return 2; + case PIPE_PRIM_TRIANGLES: + return 3; + case PIPE_PRIM_TRIANGLE_STRIP: + return 3; + case PIPE_PRIM_TRIANGLE_FAN: + return 3; + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return 4; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return 6; + default: + assert(!"Bad geometry shader input"); + return 0; + } +} + +static void draw_fetch_geometry_input(struct draw_geometry_shader *shader, + int start_primitive, + int num_primitives, + const float (*input_ptr)[4], + unsigned input_vertex_stride, + unsigned inputs_from_vs) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned slot, vs_slot, k, j; + unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); + int idx = 0; + + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) { + /*debug_printf("Slot = %d (semantic = %d)\n", slot, + shader->info.input_semantic_name[slot]);*/ + if (shader->info.input_semantic_name[slot] == + TGSI_SEMANTIC_PRIMID) { + for (j = 0; j < num_primitives; ++j) { + machine->Inputs[idx].xyzw[0].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[1].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[2].f[j] = (float)start_primitive + j; + machine->Inputs[idx].xyzw[3].f[j] = (float)start_primitive + j; + } + ++idx; + } else { + for (j = 0; j < num_primitives; ++j) { + int vidx = idx; + const float (*prim_ptr)[4]; + /*debug_printf(" %d) Prim (num_verts = %d)\n", start_primitive + j, + num_vertices);*/ + prim_ptr = (const float (*)[4])( + (const char *)input_ptr + + (j * num_vertices * input_vertex_stride)); + + for (k = 0; k < num_vertices; ++k, ++vidx) { + const float (*input)[4]; + input = (const float (*)[4])( + (const char *)prim_ptr + (k * input_vertex_stride)); + vidx = k * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; + /*debug_printf("\t%d)(%d) Input vert:\n", vidx, k);*/ +#if 1 + assert(!util_is_inf_or_nan(input[vs_slot][0])); + assert(!util_is_inf_or_nan(input[vs_slot][1])); + assert(!util_is_inf_or_nan(input[vs_slot][2])); + assert(!util_is_inf_or_nan(input[vs_slot][3])); +#endif + machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0]; + machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1]; + machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2]; + machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3]; +#if 0 + debug_printf("\t\t%d %f %f %f %f\n", slot, + machine->Inputs[vidx].xyzw[0].f[j], + machine->Inputs[vidx].xyzw[1].f[j], + machine->Inputs[vidx].xyzw[2].f[j], + machine->Inputs[vidx].xyzw[3].f[j]); +#endif + } + } + ++vs_slot; + idx += num_vertices; + } + } +} + +static INLINE void +draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, + int num_primitives, + float (*output)[4], + unsigned vertex_size) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned prim_idx, j, slot; + + /* Unswizzle all output results. + */ + /* FIXME: handle all the primitives produced by the gs, not just + * the first one + unsigned prim_count = + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/ + for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { + unsigned num_verts_per_prim = machine->Primitives[0]; + for (j = 0; j < num_verts_per_prim; j++) { + int idx = (prim_idx * num_verts_per_prim + j) * + shader->info.num_outputs; +#ifdef DEBUG_OUTPUTS + debug_printf("%d) Output vert:\n", idx); +#endif + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx]; + output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx]; + output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx]; + output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx]; +#ifdef DEBUG_OUTPUTS + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); +#endif + debug_assert(!util_is_inf_or_nan(output[slot][0])); + } + output = (float (*)[4])((char *)output + vertex_size); + } + } +} + +void draw_geometry_shader_run(struct draw_geometry_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned vertex_size) +{ + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); + unsigned num_primitives = count/num_vertices; + unsigned inputs_from_vs = 0; + + machine->Consts = constants; + + for (i = 0; i < shader->info.num_inputs; ++i) { + if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID) + ++inputs_from_vs; + } + + for (i = 0; i < num_primitives; ++i) { + unsigned int max_primitives = 1; + + draw_fetch_geometry_input(shader, i, max_primitives, input, + input_stride, inputs_from_vs); + + tgsi_set_exec_mask(machine, + 1, + max_primitives > 1, + max_primitives > 2, + max_primitives > 3); + + /* run interpreter */ + tgsi_exec_machine_run(machine); + + draw_geometry_fetch_outputs(shader, max_primitives, + output, vertex_size); + } +} + +void draw_geometry_shader_delete(struct draw_geometry_shader *shader) +{ + FREE((void*) shader->state.tokens); + FREE(shader); +} + +void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, + struct draw_context *draw) +{ + if (shader->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(shader->machine, + shader->state.tokens, + draw->gs.num_samplers, + draw->gs.samplers); + } +} diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h new file mode 100644 index 00000000000..d6a97d9c4ef --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2009 VMWare Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRAW_GS_H +#define DRAW_GS_H + +#include "draw_context.h" +#include "draw_private.h" + + +#define MAX_TGSI_PRIMITIVES 4 + +struct draw_context; + +/** + * Private version of the compiled geometry shader + */ +struct draw_geometry_shader { + struct draw_context *draw; + + struct tgsi_exec_machine *machine; + + /* This member will disappear shortly:*/ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + unsigned position_output; + + unsigned max_output_vertices; + unsigned input_primitive; + unsigned output_primitive; + + /* Extracted from shader: + */ + const float (*immediates)[4]; +}; + +void draw_geometry_shader_run(struct draw_geometry_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride); + +void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, + struct draw_context *draw); + +void draw_geometry_shader_delete(struct draw_geometry_shader *shader); + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 14375426ed8..4585dcdb48a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -660,13 +660,13 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) } /* update vertex attrib info */ - aaline->tex_slot = draw->vs.num_vs_outputs; - aaline->pos_slot = draw->vs.position_output; + aaline->tex_slot = draw_current_shader_outputs(draw); + aaline->pos_slot = draw_current_shader_position_output(draw);; /* advertise the extra post-transformed vertex attribute */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib; - draw->extra_vp_outputs.slot = aaline->tex_slot; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib; + draw->extra_shader_outputs.slot = aaline->tex_slot; /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -707,7 +707,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->state.texture); draw->suspend_flushing = FALSE; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 75130a8fb0e..d86717e5182 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -687,14 +687,14 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) bind_aapoint_fragment_shader(aapoint); /* update vertex attrib info */ - aapoint->tex_slot = draw->vs.num_vs_outputs; + aapoint->tex_slot = draw_current_shader_outputs(draw); assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ - aapoint->pos_slot = draw->vs.position_output; + aapoint->pos_slot = draw_current_shader_position_output(draw); - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; - draw->extra_vp_outputs.slot = aapoint->tex_slot; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib; + draw->extra_shader_outputs.slot = aapoint->tex_slot; /* find psize slot in post-transform vertex */ aapoint->psize_slot = -1; @@ -731,7 +731,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags) aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); draw->suspend_flushing = FALSE; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 0670268a196..205cda5eabe 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -114,8 +114,8 @@ static void interp( const struct clipper *clip, const struct vertex_header *out, const struct vertex_header *in ) { - const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; - const unsigned pos_attr = clip->stage.draw->vs.position_output; + const unsigned nr_attrs = draw_current_shader_outputs(clip->stage.draw); + const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw); unsigned j; /* Vertex header. diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 0a70483858c..11b39db5990 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -55,7 +55,7 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) static void cull_tri( struct draw_stage *stage, struct prim_header *header ) { - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); /* Window coords: */ const float *v0 = header->v[0]->data[pos]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 40798a5d6e7..e829492423e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -63,7 +63,7 @@ static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) static void do_offset_tri( struct draw_stage *stage, struct prim_header *header ) { - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); struct offset_stage *offset = offset_stage(stage); float inv_det = 1.0f / header->det; diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 6e921bac278..70fbab9ea76 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -73,7 +73,8 @@ screen_interp( struct draw_context *draw, const struct vertex_header *v1 ) { uint attr; - for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { + int num_outputs = draw_current_shader_outputs(draw); + for (attr = 0; attr < num_outputs; attr++) { const float *val0 = v0->data[attr]; const float *val1 = v1->data[attr]; float *newv = dst->data[attr]; @@ -121,7 +122,7 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) struct stipple_stage *stipple = stipple_stage(stage); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const float *pos0 = v0->data[pos]; const float *pos1 = v1->data[pos]; float start = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index f32cbef983d..3073c870825 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -59,7 +59,7 @@ static void wideline_line( struct draw_stage *stage, struct prim_header *header ) { /*const struct wideline_stage *wide = wideline_stage(stage);*/ - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const float half_width = 0.5f * stage->draw->rasterizer->line_width; struct prim_header tri; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 7d76a7dbf39..8dc50c0ab43 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -112,7 +112,7 @@ static void set_texcoords(const struct widepoint_stage *wide, if (wide->point_coord_fs_input >= 0) { /* put gl_PointCoord into the extra vertex slot */ - uint slot = wide->stage.draw->extra_vp_outputs.slot; + uint slot = wide->stage.draw->extra_shader_outputs.slot; v->data[slot][0] = tc[0]; v->data[slot][1] = tc[1]; v->data[slot][2] = 0.0F; @@ -130,7 +130,7 @@ static void widepoint_point( struct draw_stage *stage, struct prim_header *header ) { const struct widepoint_stage *wide = widepoint_stage(stage); - const unsigned pos = stage->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(stage->draw); const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; float half_size; float left_adj, right_adj, bot_adj, top_adj; @@ -257,13 +257,13 @@ static void widepoint_first_point( struct draw_stage *stage, wide->point_coord_fs_input = find_pntc_input_attrib(draw); /* setup extra vp output (point coord implemented as a texcoord) */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = 0; - draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs; + draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_shader_outputs.semantic_index = 0; + draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw); } else { wide->point_coord_fs_input = -1; - draw->extra_vp_outputs.slot = 0; + draw->extra_shader_outputs.slot = 0; } wide->psize_slot = -1; @@ -287,7 +287,7 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags ) { stage->point = widepoint_first_point; stage->next->flush( stage->next, flags ); - stage->draw->extra_vp_outputs.slot = 0; + stage->draw->extra_shader_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 129d919a846..2a11b8c5459 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -152,8 +152,9 @@ struct draw_context /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; - /** constant buffer (for vertex shader) */ - const void *constants; + /** constant buffer (for vertex/geometry shader) */ + const void *vs_constants; + const void *gs_constants; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -211,6 +212,18 @@ struct draw_context struct translate_cache *emit_cache; } vs; + struct { + struct draw_geometry_shader *geometry_shader; + uint num_gs_outputs; /**< convenience, from geometry_shader */ + uint position_output; + + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine *machine; + + uint num_samplers; + struct tgsi_sampler **samplers; + } gs; + /* Clip derived state: */ float plane[12][4]; @@ -222,7 +235,7 @@ struct draw_context uint semantic_name; uint semantic_index; int slot; - } extra_vp_outputs; + } extra_shader_outputs; unsigned reduced_prim; @@ -247,6 +260,19 @@ void draw_vs_set_constants( struct draw_context *, +/******************************************************************************* + * Geometry shading code: + */ +boolean draw_gs_init( struct draw_context *draw ); +void draw_gs_set_constants( struct draw_context *, + const float (*constants)[4], + unsigned size ); + +/******************************************************************************* + * Common shading code: + */ +int draw_current_shader_outputs(struct draw_context *draw); +int draw_current_shader_position_output(struct draw_context *draw); /******************************************************************************* * Vertex processing (was passthrough) code: diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 0238f2e2348..279f4eec635 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -32,6 +32,7 @@ #include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" #include "translate/translate.h" @@ -131,7 +132,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( fetch_count, 4 ); @@ -159,13 +161,21 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, */ if (opt & PT_SHADE) { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); + vshader->run_linear(vshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.vs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + if (gshader) + draw_geometry_shader_run(gshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, @@ -208,6 +218,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( count, 4 ); @@ -238,10 +249,19 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, + (const float (*)[4])draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, @@ -282,6 +302,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( count, 4 ); @@ -308,10 +329,19 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.constants, + (const float (*)[4])draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 9dfb47837e0..55151823a14 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -100,7 +100,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; - const unsigned pos = pvs->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned clipped = 0; unsigned j; @@ -190,7 +190,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; - const unsigned pos = pvs->draw->vs.position_output; + const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned j; if (0) debug_printf("%s\n", __FUNCTION__); diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index b61fa291436..17c3b8cec26 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -50,16 +50,32 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) *first = 2; *incr = 1; break; + case PIPE_PRIM_LINES_ADJACENCY: + *first = 4; + *incr = 2; + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + *first = 4; + *incr = 1; + break; case PIPE_PRIM_TRIANGLES: *first = 3; *incr = 3; break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + *first = 6; + *incr = 3; + break; case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_TRIANGLE_FAN: case PIPE_PRIM_POLYGON: *first = 3; *incr = 1; break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + *first = 6; + *incr = 1; + break; case PIPE_PRIM_QUADS: *first = 4; *incr = 4; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index 010c7a18a7c..f0aec5febab 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -36,6 +36,10 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: for (j = 0; j < count;) { unsigned remaining = count - j; unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr ); diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 60b7a3ea361..9f40030f39f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -148,11 +148,12 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); + /* FIXME: geometry shading? */ if (vsvg->base.key.clip) { /* not really handling clipping, just do the rhw so we can @@ -210,7 +211,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile deleted file mode 100644 index 5a96d94ec37..00000000000 --- a/src/gallium/auxiliary/gallivm/Makefile +++ /dev/null @@ -1,92 +0,0 @@ -# -*-makefile-*- -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = gallivm - - -GALLIVM_SOURCES = \ - gallivm.cpp \ - gallivm_cpu.cpp \ - instructions.cpp \ - loweringpass.cpp \ - tgsitollvm.cpp \ - storage.cpp \ - storagesoa.cpp \ - instructionssoa.cpp - -INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp - -CPP_SOURCES = \ - $(GALLIVM_SOURCES) - -C_SOURCES = -ASM_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CPP_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) - -### Include directories -INCLUDES = \ - -I. \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.cpp.o: - $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -##### TARGETS ##### - -default:: depend symlinks $(LIBNAME) - - -$(LIBNAME): $(OBJECTS) Makefile - $(TOP)/bin/mklib -o $@ -static $(OBJECTS) - - -depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ - $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null - - -gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp1.bin - -gallivmsoabuiltins.cpp: soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ - rm temp2.bin - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -# Remove .o and backup files -clean: - -rm -f *.o */*.o *~ *.so *~ server/*.o - -rm -f depend depend.bak - -rm -f gallivm_builtins.cpp - -rm -f gallivmsoabuiltins.cpp - -symlinks: - - -include depend diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript deleted file mode 100644 index c0aa51b90a9..00000000000 --- a/src/gallium/auxiliary/gallivm/SConscript +++ /dev/null @@ -1,16 +0,0 @@ -Import('*') - -gallivm = env.ConvenienceLibrary( - target = 'gallivm', - source = [ - 'gallivm.cpp', - 'gallivm_cpu.cpp', - 'instructions.cpp', - 'loweringpass.cpp', - 'tgsitollvm.cpp', - 'storage.cpp', - 'storagesoa.cpp', - 'instructionssoa.cpp', - ]) - -auxiliaries.insert(0, gallivm) diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 5cafe8c3f0c..8f7d3b71004 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -552,7 +552,7 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; @@ -919,7 +919,7 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile deleted file mode 100644 index f2ebc3f410a..00000000000 --- a/src/gallium/auxiliary/indices/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = indices - -C_SOURCES = \ - u_indices_gen.c \ - u_unfilled_gen.c - -include ../../Makefile.template - -u_indices_gen.c: u_indices_gen.py - python $< > $@ - -u_unfilled_gen.c: u_unfilled_gen.py - python $< > $@ diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript deleted file mode 100644 index 712e215534f..00000000000 --- a/src/gallium/auxiliary/indices/SConscript +++ /dev/null @@ -1,28 +0,0 @@ -Import('*') - -from sys import executable as python_cmd - -env.CodeGenerate( - target = 'u_indices_gen.c', - script = 'u_indices_gen.py', - source = [], - command = python_cmd + ' $SCRIPT > $TARGET' -) - -env.CodeGenerate( - target = 'u_unfilled_gen.c', - script = 'u_unfilled_gen.py', - source = [], - command = python_cmd + ' $SCRIPT > $TARGET' -) - -indices = env.ConvenienceLibrary( - target = 'indices', - source = [ -# 'u_indices.c', -# 'u_unfilled_indices.c', - 'u_indices_gen.c', - 'u_unfilled_gen.c', - ]) - -auxiliaries.insert(0, indices) diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile deleted file mode 100644 index 1c00ba8d986..00000000000 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = pipebuffer - -C_SOURCES = \ - pb_buffer_fenced.c \ - pb_buffer_malloc.c \ - pb_bufmgr_alt.c \ - pb_bufmgr_cache.c \ - pb_bufmgr_debug.c \ - pb_bufmgr_fenced.c \ - pb_bufmgr_mm.c \ - pb_bufmgr_ondemand.c \ - pb_bufmgr_pool.c \ - pb_bufmgr_slab.c \ - pb_validate.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript deleted file mode 100644 index 8e9f06abe45..00000000000 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ /dev/null @@ -1,19 +0,0 @@ -Import('*') - -pipebuffer = env.ConvenienceLibrary( - target = 'pipebuffer', - source = [ - 'pb_buffer_fenced.c', - 'pb_buffer_malloc.c', - 'pb_bufmgr_alt.c', - 'pb_bufmgr_cache.c', - 'pb_bufmgr_debug.c', - 'pb_bufmgr_fenced.c', - 'pb_bufmgr_mm.c', - 'pb_bufmgr_ondemand.c', - 'pb_bufmgr_pool.c', - 'pb_bufmgr_slab.c', - 'pb_validate.c', - ]) - -auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/rbug/Makefile b/src/gallium/auxiliary/rbug/Makefile deleted file mode 100644 index cd12e8468fc..00000000000 --- a/src/gallium/auxiliary/rbug/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = rbug - -C_SOURCES = \ - rbug_connection.c \ - rbug_core.c \ - rbug_texture.c \ - rbug_context.c \ - rbug_shader.c \ - rbug_demarshal.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/rbug/SConscript b/src/gallium/auxiliary/rbug/SConscript deleted file mode 100644 index 4a9afb45d3c..00000000000 --- a/src/gallium/auxiliary/rbug/SConscript +++ /dev/null @@ -1,14 +0,0 @@ -Import('*') - -rbug = env.ConvenienceLibrary( - target = 'rbug', - source = [ - 'rbug_core.c', - 'rbug_shader.c', - 'rbug_context.c', - 'rbug_texture.c', - 'rbug_demarshal.c', - 'rbug_connection.c', - ]) - -auxiliaries.insert(0, rbug) diff --git a/src/gallium/auxiliary/rbug/rbug_context.h b/src/gallium/auxiliary/rbug/rbug_context.h index da61c2365b0..03126d6b123 100644 --- a/src/gallium/auxiliary/rbug/rbug_context.h +++ b/src/gallium/auxiliary/rbug/rbug_context.h @@ -46,7 +46,7 @@ typedef enum RBUG_BLOCK_BEFORE = 1, RBUG_BLOCK_AFTER = 2, RBUG_BLOCK_RULE = 4, - RBUG_BLOCK_MASK = 7, + RBUG_BLOCK_MASK = 7 } rbug_block_t; struct rbug_proto_context_list diff --git a/src/gallium/auxiliary/rbug/rbug_proto.h b/src/gallium/auxiliary/rbug/rbug_proto.h index d273be0166d..4f3eb75dc4d 100644 --- a/src/gallium/auxiliary/rbug/rbug_proto.h +++ b/src/gallium/auxiliary/rbug/rbug_proto.h @@ -65,7 +65,7 @@ enum rbug_opcode RBUG_OP_SHADER_DISABLE = 770, RBUG_OP_SHADER_REPLACE = 771, RBUG_OP_SHADER_LIST_REPLY = -768, - RBUG_OP_SHADER_INFO_REPLY = -769, + RBUG_OP_SHADER_INFO_REPLY = -769 }; /** diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile deleted file mode 100644 index ab8ea464c6e..00000000000 --- a/src/gallium/auxiliary/rtasm/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = rtasm - -C_SOURCES = \ - rtasm_cpu.c \ - rtasm_execmem.c \ - rtasm_x86sse.c \ - rtasm_ppc.c \ - rtasm_ppc_spe.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript deleted file mode 100644 index eb48368accb..00000000000 --- a/src/gallium/auxiliary/rtasm/SConscript +++ /dev/null @@ -1,13 +0,0 @@ -Import('*') - -rtasm = env.ConvenienceLibrary( - target = 'rtasm', - source = [ - 'rtasm_cpu.c', - 'rtasm_execmem.c', - 'rtasm_x86sse.c', - 'rtasm_ppc.c', - 'rtasm_ppc_spe.c', - ]) - -auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 01811d50114..ffed768f979 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -41,6 +41,12 @@ #define MAP_ANONYMOUS MAP_ANON #endif +#if defined(PIPE_OS_WINDOWS) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include <windows.h> +#endif #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) @@ -118,7 +124,29 @@ rtasm_exec_free(void *addr) } -#else /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */ +#elif defined(PIPE_OS_WINDOWS) + + +/* + * Avoid Data Execution Prevention. + */ + +void * +rtasm_exec_malloc(size_t size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); +} + + +void +rtasm_exec_free(void *addr) +{ + VirtualFree(addr, 0, MEM_RELEASE); +} + + +#else + /* * Just use regular memory. @@ -138,4 +166,4 @@ rtasm_exec_free(void *addr) } -#endif /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */ +#endif diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile deleted file mode 100644 index a7d111b6891..00000000000 --- a/src/gallium/auxiliary/sct/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = sct - -C_SOURCES = \ - sct.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript deleted file mode 100644 index 76927d973f8..00000000000 --- a/src/gallium/auxiliary/sct/SConscript +++ /dev/null @@ -1,9 +0,0 @@ -Import('*') - -sct = env.ConvenienceLibrary( - target = 'sct', - source = [ - 'sct.c' - ]) - -auxiliaries.insert(0, sct) diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c deleted file mode 100644 index 722d2b7e66e..00000000000 --- a/src/gallium/auxiliary/sct/sct.c +++ /dev/null @@ -1,453 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "pipe/p_state.h" -#include "sct.h" - - -struct texture_list -{ - struct pipe_texture *texture; - struct texture_list *next; -}; - - - -#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1) - -struct sct_context -{ - const struct pipe_context *context; - - /** surfaces the context is drawing into */ - struct pipe_surface *surfaces[MAX_SURFACES]; - - /** currently bound textures */ - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; - - /** previously bound textures, used but not flushed */ - struct texture_list *textures_used; - - boolean needs_flush; - - struct sct_context *next; -}; - - - -struct sct_surface -{ - const struct pipe_surface *surface; - - /** list of contexts drawing to this surface */ - struct sct_context_list *contexts; - - struct sct_surface *next; -}; - - - -/** - * Find the surface_info for the given pipe_surface - */ -static struct sct_surface * -find_surface_info(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - struct sct_surface *si; - for (si = sct->surfaces; si; si = si->next) - if (si->surface == surface) - return si; - return NULL; -} - - -/** - * As above, but create new surface_info if surface is new. - */ -static struct sct_surface * -find_create_surface_info(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - struct sct_surface *si = find_surface_info(sct, surface); - if (si) - return si; - - /* alloc new */ - si = CALLOC_STRUCT(sct_surface); - if (si) { - si->surface = surface; - - /* insert at head */ - si->next = sct->surfaces; - sct->surfaces = si; - } - - return si; -} - - -/** - * Find a context_info for the given context. - */ -static struct sct_context * -find_context_info(struct surface_context_tracker *sct, - const struct pipe_context *context) -{ - struct sct_context *ci; - for (ci = sct->contexts; ci; ci = ci->next) - if (ci->context == context) - return ci; - return NULL; -} - - -/** - * As above, but create new context_info if context is new. - */ -static struct sct_context * -find_create_context_info(struct surface_context_tracker *sct, - const struct pipe_context *context) -{ - struct sct_context *ci = find_context_info(sct, context); - if (ci) - return ci; - - /* alloc new */ - ci = CALLOC_STRUCT(sct_context); - if (ci) { - ci->context = context; - - /* insert at head */ - ci->next = sct->contexts; - sct->contexts = ci; - } - - return ci; -} - - -/** - * Is the context already bound to the surface? - */ -static boolean -find_surface_context(const struct sct_surface *si, - const struct pipe_context *context) -{ - const struct sct_context_list *cl; - for (cl = si->contexts; cl; cl = cl->next) { - if (cl->context == context) { - return TRUE; - } - } - return FALSE; -} - - -/** - * Add a context to the list of contexts associated with a surface. - */ -static void -add_context_to_surface(struct sct_surface *si, - const struct pipe_context *context) -{ - struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list); - if (cl) { - cl->context = context; - /* insert at head of list of contexts */ - cl->next = si->contexts; - si->contexts = cl; - } -} - - -/** - * Remove a context from the list of contexts associated with a surface. - */ -static void -remove_context_from_surface(struct sct_surface *si, - const struct pipe_context *context) -{ - struct sct_context_list *prev = NULL, *curr, *next; - - for (curr = si->contexts; curr; curr = next) { - if (curr->context == context) { - /* remove */ - if (prev) - prev->next = curr->next; - else - si->contexts = curr->next; - next = curr->next; - FREE(curr); - } - else { - prev = curr; - next = curr->next; - } - } -} - - -/** - * Unbind context from surface. - */ -static void -unbind_context_surface(struct surface_context_tracker *sct, - struct pipe_context *context, - struct pipe_surface *surface) -{ - struct sct_surface *si = find_surface_info(sct, surface); - if (si) { - remove_context_from_surface(si, context); - } -} - - -/** - * Bind context to a set of surfaces (color + Z). - * Like MakeCurrent(). - */ -void -sct_bind_surfaces(struct surface_context_tracker *sct, - struct pipe_context *context, - uint num_surf, - struct pipe_surface **surfaces) -{ - struct sct_context *ci = find_create_context_info(sct, context); - uint i; - - if (!ci) { - return; /* out of memory */ - } - - /* unbind currently bound surfaces */ - for (i = 0; i < MAX_SURFACES; i++) { - if (ci->surfaces[i]) { - unbind_context_surface(sct, context, ci->surfaces[i]); - } - } - - /* bind new surfaces */ - for (i = 0; i < num_surf; i++) { - struct sct_surface *si = find_create_surface_info(sct, surfaces[i]); - if (!find_surface_context(si, context)) { - add_context_to_surface(si, context); - } - } -} - - -/** - * Return list of contexts bound to a surface. - */ -const struct sct_context_list * -sct_get_surface_contexts(struct surface_context_tracker *sct, - const struct pipe_surface *surface) -{ - const struct sct_surface *si = find_surface_info(sct, surface); - return si->contexts; -} - - - -static boolean -find_texture(const struct sct_context *ci, - const struct pipe_texture *texture) -{ - const struct texture_list *tl; - - for (tl = ci->textures_used; tl; tl = tl->next) { - if (tl->texture == texture) { - return TRUE; - } - } - return FALSE; -} - - -/** - * Add the given texture to the context's list of used textures. - */ -static void -add_texture_used(struct sct_context *ci, - struct pipe_texture *texture) -{ - if (!find_texture(ci, texture)) { - /* add to list */ - struct texture_list *tl = CALLOC_STRUCT(texture_list); - if (tl) { - pipe_texture_reference(&tl->texture, texture); - /* insert at head */ - tl->next = ci->textures_used; - ci->textures_used = tl; - } - } -} - - -/** - * Bind a texture to a rendering context. - */ -void -sct_bind_texture(struct surface_context_tracker *sct, - struct pipe_context *context, - uint unit, - struct pipe_texture *tex) -{ - struct sct_context *ci = find_context_info(sct, context); - - if (ci->textures[unit] != tex) { - /* put texture on the 'used' list */ - add_texture_used(ci, tex); - /* bind new */ - pipe_texture_reference(&ci->textures[unit], tex); - } -} - - -/** - * Check if the given texture has been used by the rendering context - * since the last call to sct_flush_textures(). - */ -boolean -sct_is_texture_used(struct surface_context_tracker *sct, - const struct pipe_context *context, - const struct pipe_texture *texture) -{ - const struct sct_context *ci = find_context_info(sct, context); - return find_texture(ci, texture); -} - - -/** - * To be called when the image contents of a texture are changed, such - * as for gl[Copy]TexSubImage(). - * XXX this may not be needed - */ -void -sct_update_texture(struct pipe_texture *tex) -{ - -} - - -/** - * When a scene is flushed/rendered we can release the list of - * used textures. - */ -void -sct_flush_textures(struct surface_context_tracker *sct, - struct pipe_context *context) -{ - struct sct_context *ci = find_context_info(sct, context); - struct texture_list *tl, *next; - uint i; - - for (tl = ci->textures_used; tl; tl = next) { - next = tl->next; - pipe_texture_reference(&tl->texture, NULL); - FREE(tl); - } - ci->textures_used = NULL; - - /* put the currently bound textures on the 'used' list */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - add_texture_used(ci, ci->textures[i]); - } -} - - - -void -sct_destroy_context(struct surface_context_tracker *sct, - struct pipe_context *context) -{ - /* XXX should we require an unbinding first? */ - { - struct sct_surface *si; - for (si = sct->surfaces; si; si = si->next) { - remove_context_from_surface(si, context); - } - } - - /* remove context from context_info list */ - { - struct sct_context *ci, *next, *prev = NULL; - for (ci = sct->contexts; ci; ci = next) { - next = ci->next; - if (ci->context == context) { - if (prev) - prev->next = ci->next; - else - sct->contexts = ci->next; - FREE(ci); - } - else { - prev = ci; - } - } - } - -} - - -void -sct_destroy_surface(struct surface_context_tracker *sct, - struct pipe_surface *surface) -{ - if (1) { - /* debug/sanity: no context should be bound to surface */ - struct sct_context *ci; - uint i; - for (ci = sct->contexts; ci; ci = ci->next) { - for (i = 0; i < MAX_SURFACES; i++) { - assert(ci->surfaces[i] != surface); - } - } - } - - /* remove surface from sct_surface list */ - { - struct sct_surface *si, *next, *prev = NULL; - for (si = sct->surfaces; si; si = next) { - next = si->next; - if (si->surface == surface) { - /* unlink */ - if (prev) - prev->next = si->next; - else - sct->surfaces = si->next; - FREE(si); - } - else { - prev = si; - } - } - } -} diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h deleted file mode 100644 index cf7c4d3bdfd..00000000000 --- a/src/gallium/auxiliary/sct/sct.h +++ /dev/null @@ -1,123 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Surface/Context Tracking - * - * For some drivers, we need to monitor the binding between contexts and - * surfaces/textures. - * This code may evolve quite a bit... - */ - - -#ifndef SCT_H -#define SCT_H - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_context; -struct pipe_surface; - -struct sct_context; -struct sct_surface; - - -/** - * Per-device info, basically - */ -struct surface_context_tracker -{ - struct sct_context *contexts; - struct sct_surface *surfaces; -}; - - - -/** - * Simple linked list of contexts - */ -struct sct_context_list -{ - const struct pipe_context *context; - struct sct_context_list *next; -}; - - - -extern void -sct_bind_surfaces(struct surface_context_tracker *sct, - struct pipe_context *context, - uint num_surf, - struct pipe_surface **surfaces); - - -extern void -sct_bind_texture(struct surface_context_tracker *sct, - struct pipe_context *context, - uint unit, - struct pipe_texture *texture); - - -extern void -sct_update_texture(struct pipe_texture *tex); - - -extern boolean -sct_is_texture_used(struct surface_context_tracker *sct, - const struct pipe_context *context, - const struct pipe_texture *texture); - -extern void -sct_flush_textures(struct surface_context_tracker *sct, - struct pipe_context *context); - - -extern const struct sct_context_list * -sct_get_surface_contexts(struct surface_context_tracker *sct, - const struct pipe_surface *surf); - - -extern void -sct_destroy_context(struct surface_context_tracker *sct, - struct pipe_context *context); - - -extern void -sct_destroy_surface(struct surface_context_tracker *sct, - struct pipe_surface *surface); - - - -#ifdef __cplusplus -} -#endif - -#endif /* SCT_H */ diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c deleted file mode 100644 index 6227f199628..00000000000 --- a/src/gallium/auxiliary/sct/usage.c +++ /dev/null @@ -1,61 +0,0 @@ -/* surface / context tracking */ - - -/* - -context A: - render to texture T - -context B: - texture from T - ------------------------ - -flush surface: - which contexts are bound to the surface? - ------------------------ - -glTexSubImage(): - which contexts need to be flushed? - - */ - - -/* - -in MakeCurrent(): - - call sct_bind_surfaces(context, list of surfaces) to update the - dependencies between context and surfaces - - -in SurfaceFlush(), or whatever it is in D3D: - - call sct_get_surface_contexts(surface) to get a list of contexts - which are currently bound to the surface. - - - -in BindTexture(): - - call sct_bind_texture(context, texture) to indicate that the texture - is used in the scene. - - -in glTexSubImage() or RenderToTexture(): - - call sct_is_texture_used(context, texture) to determine if the texture - has been used in the scene, but the scene's not flushed. If TRUE is - returned it means the scene has to be rendered/flushed before the contents - of the texture can be changed. - - -in psb_scene_flush/terminate(): - - call sct_flush_textures(context) to tell the SCT that the textures which - were used in the scene can be released. - - - -*/ diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile deleted file mode 100644 index 5f0a580b096..00000000000 --- a/src/gallium/auxiliary/tgsi/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = tgsi - -C_SOURCES = \ - tgsi_sanity.c \ - tgsi_build.c \ - tgsi_dump.c \ - tgsi_exec.c \ - tgsi_info.c \ - tgsi_iterate.c \ - tgsi_parse.c \ - tgsi_ppc.c \ - tgsi_scan.c \ - tgsi_sse2.c \ - tgsi_text.c \ - tgsi_transform.c \ - tgsi_ureg.c \ - tgsi_util.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript deleted file mode 100644 index b6bc2924f06..00000000000 --- a/src/gallium/auxiliary/tgsi/SConscript +++ /dev/null @@ -1,23 +0,0 @@ -Import('*') - -tgsi = env.ConvenienceLibrary( - target = 'tgsi', - source = [ - 'tgsi_build.c', - 'tgsi_dump.c', - 'tgsi_dump_c.c', - 'tgsi_exec.c', - 'tgsi_info.c', - 'tgsi_iterate.c', - 'tgsi_parse.c', - 'tgsi_sanity.c', - 'tgsi_scan.c', - 'tgsi_ppc.c', - 'tgsi_sse2.c', - 'tgsi_text.c', - 'tgsi_transform.c', - 'tgsi_ureg.c', - 'tgsi_util.c', - ]) - -auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 92903fe57f3..de9cbc86305 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -1025,7 +1025,7 @@ tgsi_build_full_property( size++; *property = tgsi_build_property( - TGSI_PROPERTY_GS_INPUT_PRIM, + full_prop->Property.PropertyName, header ); assert( full_prop->Property.NrTokens <= 8 + 1 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 4391ca75d1d..d7ff262f30a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -123,12 +123,15 @@ static const char *semantic_names[] = "NORMAL", "FACE", "EDGEFLAG", + "PRIM_ID", "INSTANCEID" }; static const char *immediate_type_names[] = { - "FLT32" + "FLT32", + "UINT32", + "INT32" }; static const char *swizzle_names[] = @@ -175,13 +178,19 @@ static const char *primitive_names[] = static void -_dump_register( +_dump_register_decl( struct dump_ctx *ctx, uint file, int first, int last ) { ENM( file, file_names ); + + /* all geometry shader inputs are two dimensional */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) + TXT("[]"); + CHR( '[' ); SID( first ); if (first != last) { @@ -192,6 +201,52 @@ _dump_register( } static void +_dump_register_dst( + struct dump_ctx *ctx, + uint file, + int index) +{ + ENM( file, file_names ); + + CHR( '[' ); + SID( index ); + CHR( ']' ); +} + + +static void +_dump_register_src( + struct dump_ctx *ctx, + const struct tgsi_full_src_register *src ) +{ + if (src->Register.Indirect) { + ENM( src->Register.File, file_names ); + CHR( '[' ); + ENM( src->Indirect.File, file_names ); + CHR( '[' ); + SID( src->Indirect.Index ); + TXT( "]." ); + ENM( src->Indirect.SwizzleX, swizzle_names ); + if (src->Register.Index != 0) { + if (src->Register.Index > 0) + CHR( '+' ); + SID( src->Register.Index ); + } + CHR( ']' ); + } else { + ENM( src->Register.File, file_names ); + CHR( '[' ); + SID( src->Register.Index ); + CHR( ']' ); + } + if (src->Register.Dimension) { + CHR( '[' ); + SID( src->Dimension.Index ); + CHR( ']' ); + } +} + +static void _dump_register_ind( struct dump_ctx *ctx, uint file, @@ -245,7 +300,7 @@ iter_declaration( TXT( "DCL " ); - _dump_register( + _dump_register_decl( ctx, decl->Declaration.File, decl->Range.First, @@ -360,6 +415,12 @@ iter_immediate( case TGSI_IMM_FLOAT32: FLT( imm->u[i].Float ); break; + case TGSI_IMM_UINT32: + UID(imm->u[i].Uint); + break; + case TGSI_IMM_INT32: + SID(imm->u[i].Int); + break; default: assert( 0 ); } @@ -436,10 +497,9 @@ iter_instruction( dst->Indirect.SwizzleX ); } else { - _dump_register( + _dump_register_dst( ctx, dst->Register.File, - dst->Register.Index, dst->Register.Index ); } _dump_writemask( ctx, dst->Register.WriteMask ); @@ -455,26 +515,11 @@ iter_instruction( CHR( ' ' ); if (src->Register.Negate) - TXT( "-(" ); + CHR( '-' ); if (src->Register.Absolute) CHR( '|' ); - if (src->Register.Indirect) { - _dump_register_ind( - ctx, - src->Register.File, - src->Register.Index, - src->Indirect.File, - src->Indirect.Index, - src->Indirect.SwizzleX ); - } - else { - _dump_register( - ctx, - src->Register.File, - src->Register.Index, - src->Register.Index ); - } + _dump_register_src(ctx, src); if (src->Register.SwizzleX != TGSI_SWIZZLE_X || src->Register.SwizzleY != TGSI_SWIZZLE_Y || @@ -489,8 +534,6 @@ iter_instruction( if (src->Register.Absolute) CHR( '|' ); - if (src->Register.Negate) - CHR( ')' ); first_reg = FALSE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 123117cb0a3..f43233bdb49 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,6 +61,7 @@ #include "util/u_memory.h" #include "util/u_math.h" + #define FAST_MATH 1 #define TILE_TOP_LEFT 0 @@ -67,11 +69,329 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static void +micro_abs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = fabsf(src->f[0]); + dst->f[1] = fabsf(src->f[1]); + dst->f[2] = fabsf(src->f[2]); + dst->f[3] = fabsf(src->f[3]); +} + +static void +micro_arl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0]); + dst->i[1] = (int)floorf(src->f[1]); + dst->i[2] = (int)floorf(src->f[2]); + dst->i[3] = (int)floorf(src->f[3]); +} + +static void +micro_arr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0] + 0.5f); + dst->i[1] = (int)floorf(src->f[1] + 0.5f); + dst->i[2] = (int)floorf(src->f[2] + 0.5f); + dst->i[3] = (int)floorf(src->f[3] + 0.5f); +} + +static void +micro_ceil(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = ceilf(src->f[0]); + dst->f[1] = ceilf(src->f[1]); + dst->f[2] = ceilf(src->f[2]); + dst->f[3] = ceilf(src->f[3]); +} + +static void +micro_cos(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = cosf(src->f[0]); + dst->f[1] = cosf(src->f[1]); + dst->f[2] = cosf(src->f[2]); + dst->f[3] = cosf(src->f[3]); +} + +static void +micro_ddx(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; +} + +static void +micro_exp2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2(src->f[0]); + dst->f[1] = util_fast_exp2(src->f[1]); + dst->f[2] = util_fast_exp2(src->f[2]); + dst->f[3] = util_fast_exp2(src->f[3]); +#else +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif /* DEBUG */ + + dst->f[0] = powf(2.0f, src->f[0]); + dst->f[1] = powf(2.0f, src->f[1]); + dst->f[2] = powf(2.0f, src->f[2]); + dst->f[3] = powf(2.0f, src->f[3]); +#endif /* FAST_MATH */ +} + +static void +micro_flr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0]); + dst->f[1] = floorf(src->f[1]); + dst->f[2] = floorf(src->f[2]); + dst->f[3] = floorf(src->f[3]); +} + +static void +micro_frc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] - floorf(src->f[0]); + dst->f[1] = src->f[1] - floorf(src->f[1]); + dst->f[2] = src->f[2] - floorf(src->f[2]); + dst->f[3] = src->f[3] - floorf(src->f[3]); +} + +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_lg2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2(src->f[0]); + dst->f[1] = util_fast_log2(src->f[1]); + dst->f[2] = util_fast_log2(src->f[2]); + dst->f[3] = util_fast_log2(src->f[3]); +#else + dst->f[0] = logf(src->f[0]) * 1.442695f; + dst->f[1] = logf(src->f[1]) * 1.442695f; + dst->f[2] = logf(src->f[2]) * 1.442695f; + dst->f[3] = logf(src->f[3]) * 1.442695f; +#endif +} + +static void +micro_lrp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0]; + dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1]; + dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2]; + dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3]; +} + +static void +micro_mad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0]; + dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1]; + dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2]; + dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + +static void +micro_rcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / src->f[0]; + dst->f[1] = 1.0f / src->f[1]; + dst->f[2] = 1.0f / src->f[2]; + dst->f[3] = 1.0f / src->f[3]; +} + +static void +micro_rnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); +} + +static void +micro_rsq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); + dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); + dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); + dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); +} + +static void +micro_seq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sgn(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +micro_sgt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = sinf(src->f[0]); + dst->f[1] = sinf(src->f[1]); + dst->f[2] = sinf(src->f[2]); + dst->f[3] = sinf(src->f[3]); +} + +static void +micro_sle(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_slt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_trunc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; +} + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -123,23 +443,19 @@ /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#ifdef DEBUG -static void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); -} -#endif +#define CHECK_INF_OR_NAN(chan) do {\ + assert(!util_is_inf_or_nan((chan)->f[0]));\ + assert(!util_is_inf_or_nan((chan)->f[1]));\ + assert(!util_is_inf_or_nan((chan)->f[2]));\ + assert(!util_is_inf_or_nan((chan)->f[3]));\ + } while (0) #ifdef DEBUG @@ -292,6 +608,14 @@ tgsi_exec_machine_bind_shader( * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { + unsigned reg; + for (reg = parse.FullToken.FullDeclaration.Range.First; + reg <= parse.FullToken.FullDeclaration.Range.Last; + ++reg) { + ++mach->NumOutputs; + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -372,6 +696,7 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ @@ -413,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) align_free(mach); } - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - static void micro_add( union tgsi_exec_channel *dst, @@ -437,76 +750,6 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } -#if 0 -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} -#endif - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; -} - static void micro_div( union tgsi_exec_channel *dst, @@ -527,99 +770,6 @@ micro_div( } } -#if 0 -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} -#endif - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -#if 0 -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ -#if FAST_MATH - dst->f[0] = util_fast_exp2( src->f[0] ); - dst->f[1] = util_fast_exp2( src->f[1] ); - dst->f[2] = util_fast_exp2( src->f[2] ); - dst->f[3] = util_fast_exp2( src->f[3] ); -#else - -#if DEBUG - /* Inf is okay for this instruction, so clamp it to silence assertions. */ - uint i; - union tgsi_exec_channel clamped; - - for (i = 0; i < 4; i++) { - if (src->f[i] > 127.99999f) { - clamped.f[i] = 127.99999f; - } else if (src->f[i] < -126.99999f) { - clamped.f[i] = -126.99999f; - } else { - clamped.f[i] = src->f[i]; - } - } - src = &clamped; -#endif - - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -#endif -} - -#if 0 -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} -#endif - static void micro_float_clamp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -647,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst, } static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ -#if FAST_MATH - dst->f[0] = util_fast_log2( src->f[0] ); - dst->f[1] = util_fast_log2( src->f[1] ); - dst->f[2] = util_fast_log2( src->f[2] ); - dst->f[3] = util_fast_log2( src->f[3] ); -#else - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -#endif -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -725,38 +810,6 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } -#if 0 -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -#if 0 -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} -#endif - static void micro_max( union tgsi_exec_channel *dst, @@ -769,34 +822,6 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - static void micro_min( union tgsi_exec_channel *dst, @@ -809,48 +834,6 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - -#if 0 -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} -#endif - static void micro_mul( union tgsi_exec_channel *dst, @@ -865,20 +848,6 @@ micro_mul( #if 0 static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} -#endif - -#if 0 -static void micro_imul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, @@ -942,42 +911,6 @@ micro_neg( dst->f[3] = -src->f[3]; } -#if 0 -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} -#endif - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - static void micro_pow( union tgsi_exec_channel *dst, @@ -998,88 +931,6 @@ micro_pow( } static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_sgn( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; - dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; - dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; - dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -#if 0 -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} -#endif - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { @@ -1101,31 +952,6 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } -#if 0 -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} -#endif - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - static void fetch_src_file_channel( const struct tgsi_exec_machine *mach, @@ -1224,11 +1050,11 @@ fetch_src_file_channel( } static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; uint swizzle; @@ -1277,10 +1103,10 @@ fetch_source( &indir_index ); /* add value of address register to the offset */ - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1357,10 +1183,10 @@ fetch_source( &index2, &indir_index ); - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1385,32 +1211,30 @@ fetch_source( &index, chan ); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } + } - case TGSI_UTIL_SIGN_KEEP: - break; + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } } } static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { uint i; union tgsi_exec_channel null; @@ -1419,9 +1243,9 @@ store_dest( int offset = 0; /* indirection offset */ int index; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { + CHECK_INF_OR_NAN(chan); + } /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1456,7 +1280,7 @@ store_dest( &indir_index ); /* save indirection offset */ - offset = (int) indir_index.f[0]; + offset = indir_index.i[0]; } switch (reg->Register.File) { @@ -1468,6 +1292,15 @@ store_dest( index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; +#if 0 + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + fprintf(stderr, "%f, ", chan->f[i]); + fprintf(stderr, ")\n"); + } +#endif break; case TGSI_FILE_TEMPORARY: @@ -1577,10 +1410,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) /** @@ -1638,6 +1471,35 @@ exec_kilp(struct tgsi_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +static void +emit_vertex(struct tgsi_exec_machine *mach) +{ + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + } +} + +static void +emit_primitive(struct tgsi_exec_machine *mach) +{ + unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + ++(*prim_count); + debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); + mach->Primitives[*prim_count] = 0; + } +} /* * Fetch a four texture samples using STR texture coordinates. @@ -1908,7 +1770,7 @@ exec_declaration(struct tgsi_exec_machine *mach, if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { assert(decl->Semantic.Index == 0); assert(first == last); - assert(mask = TGSI_WRITEMASK_XYZW); + assert(mask == TGSI_WRITEMASK_XYZW); mach->Inputs[first] = mach->QuadPos; } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { @@ -1954,6 +1816,461 @@ exec_declaration(struct tgsi_exec_machine *mach, } } +typedef void (* micro_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src); + +static void +exec_scalar_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src; + union tgsi_exec_channel dst; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); + op(&dst, &src); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src; + + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); + op(&dst.xyzw[chan], &src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[2]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_trinary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_break(struct tgsi_exec_machine *mach) +{ + if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + } else { + assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); + + mach->Switch.mask = 0x0; + + UPDATE_EXEC_MASK(mach); + } +} + +static void +exec_switch(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + mach->Switch.mask = 0x0; + mach->Switch.defaultMask = 0x0; + + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_case(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + union tgsi_exec_channel src; + uint mask = 0; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + + if (mach->Switch.selector.u[0] == src.u[0]) { + mask |= 0x1; + } + if (mach->Switch.selector.u[1] == src.u[1]) { + mask |= 0x2; + } + if (mach->Switch.selector.u[2] == src.u[2]) { + mask |= 0x4; + } + if (mach->Switch.selector.u[3] == src.u[3]) { + mask |= 0x8; + } + + mach->Switch.defaultMask |= mask; + + mach->Switch.mask |= mask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_default(struct tgsi_exec_machine *mach) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + + mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_endswitch(struct tgsi_exec_machine *mach) +{ + mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; + + UPDATE_EXEC_MASK(mach); +} + +static void +micro_i2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->i[0]; + dst->f[1] = (float)src->i[1]; + dst->f[2] = (float)src->i[2]; + dst->f[3] = (float)src->i[3]; +} + +static void +micro_not(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_shl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] << src[1].u[0]; + dst->u[1] = src[0].u[1] << src[1].u[1]; + dst->u[2] = src[0].u[2] << src[1].u[2]; + dst->u[3] = src[0].u[3] << src[1].u[3]; +} + +static void +micro_and(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] & src[1].u[0]; + dst->u[1] = src[0].u[1] & src[1].u[1]; + dst->u[2] = src[0].u[2] & src[1].u[2]; + dst->u[3] = src[0].u[3] & src[1].u[3]; +} + +static void +micro_or(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] | src[1].u[0]; + dst->u[1] = src[0].u[1] | src[1].u[1]; + dst->u[2] = src[0].u[2] | src[1].u[2]; + dst->u[3] = src[0].u[3] | src[1].u[3]; +} + +static void +micro_xor(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] ^ src[1].u[0]; + dst->u[1] = src[0].u[1] ^ src[1].u[1]; + dst->u[2] = src[0].u[2] ^ src[1].u[2]; + dst->u[3] = src[0].u[3] ^ src[1].u[3]; +} + +static void +micro_f2i(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)src->f[0]; + dst->i[1] = (int)src->f[1]; + dst->i[2] = (int)src->f[2]; + dst->i[3] = (int)src->f[3]; +} + +static void +micro_idiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] / src[1].i[0]; + dst->i[1] = src[0].i[1] / src[1].i[1]; + dst->i[2] = src[0].i[2] / src[1].i[2]; + dst->i[3] = src[0].i[3] / src[1].i[3]; +} + +static void +micro_imax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_imin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_isge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0; +} + +static void +micro_ishr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >> src[1].i[0]; + dst->i[1] = src[0].i[1] >> src[1].i[1]; + dst->i[2] = src[0].i[2] >> src[1].i[2]; + dst->i[3] = src[0].i[3] >> src[1].i[3]; +} + +static void +micro_islt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0; +} + +static void +micro_f2u(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = (uint)src->f[0]; + dst->u[1] = (uint)src->f[1]; + dst->u[2] = (uint)src->f[2]; + dst->u[3] = (uint)src->f[3]; +} + +static void +micro_u2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->u[0]; + dst->f[1] = (float)src->u[1]; + dst->f[2] = (float)src->u[2]; + dst->f[3] = (float)src->u[3]; +} + +static void +micro_uadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] + src[1].u[0]; + dst->u[1] = src[0].u[1] + src[1].u[1]; + dst->u[2] = src[0].u[2] + src[1].u[2]; + dst->u[3] = src[0].u[3] + src[1].u[3]; +} + +static void +micro_udiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] / src[1].u[0]; + dst->u[1] = src[0].u[1] / src[1].u[1]; + dst->u[2] = src[0].u[2] / src[1].u[2]; + dst->u[3] = src[0].u[3] / src[1].u[3]; +} + +static void +micro_umad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3]; +} + +static void +micro_umax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] % src[1].u[0]; + dst->u[1] = src[0].u[1] % src[1].u[1]; + dst->u[2] = src[0].u[2] % src[1].u[2]; + dst->u[3] = src[0].u[3] % src[1].u[3]; +} + +static void +micro_umul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3]; +} + +static void +micro_useq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0; +} + +static void +micro_usge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0; +} + +static void +micro_ushr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >> src[1].u[0]; + dst->u[1] = src[0].u[1] >> src[1].u[1]; + dst->u[2] = src[0].u[2] >> src[1].u[2]; + dst->u[3] = src[0].u[3] >> src[1].u[3]; +} + +static void +micro_uslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0; +} + +static void +micro_usne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0; +} + static void exec_instruction( struct tgsi_exec_machine *mach, @@ -1968,23 +2285,11 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&d[chan_index], 0, chan_index); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LIT: @@ -2021,23 +2326,11 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[0], &r[0] ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EXP: @@ -2208,41 +2501,15 @@ exec_instruction( break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SUB: @@ -2257,17 +2524,7 @@ exec_instruction( break; case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add(&d[chan_index], &r[0], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CND: @@ -2301,13 +2558,7 @@ exec_instruction( break; case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CLAMP: @@ -2323,33 +2574,20 @@ exec_instruction( } break; + case TGSI_OPCODE_FLR: + exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ARR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - micro_exp2( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_POW: @@ -2402,15 +2640,9 @@ exec_instruction( } break; - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - micro_abs(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; case TGSI_OPCODE_RCC: FETCH(&r[0], 0, CHAN_X); @@ -2449,33 +2681,15 @@ exec_instruction( break; case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_KILP: @@ -2552,14 +2766,7 @@ exec_instruction( break; case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SFL: @@ -2569,44 +2776,19 @@ exec_instruction( break; case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_STR: @@ -2711,6 +2893,10 @@ exec_instruction( assert (0); break; + case TGSI_OPCODE_ARR: + exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_BRA: assert (0); break; @@ -2730,6 +2916,8 @@ exec_instruction( mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; + mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; @@ -2737,12 +2925,17 @@ exec_instruction( /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ @@ -2775,6 +2968,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -2785,14 +2984,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_sgn(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CMP: @@ -2976,87 +3168,31 @@ exec_instruction( break; case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -3064,14 +3200,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -3087,13 +3216,11 @@ exec_instruction( break; case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + emit_vertex(mach); break; case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + emit_primitive(mach); break; case TGSI_OPCODE_BGNFOR: @@ -3122,11 +3249,15 @@ exec_instruction( case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; case TGSI_OPCODE_ENDFOR: @@ -3173,6 +3304,8 @@ exec_instruction( --mach->LoopLabelStackTop; assert(mach->LoopCounterStackTop > 0); --mach->LoopCounterStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; @@ -3196,15 +3329,14 @@ exec_instruction( mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->LoopLabelStackTop > 0); --mach->LoopLabelStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); + exec_break(mach); break; case TGSI_OPCODE_CONT: @@ -3235,6 +3367,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -3265,11 +3403,116 @@ exec_instruction( UPDATE_EXEC_MASK(mach); break; + case TGSI_OPCODE_F2I: + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_IDIV: + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMAX: + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMIN: + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_INEG: + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISGE: + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISHR: + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISLT: + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_F2U: + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2F: + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UADD: + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UDIV: + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAD: + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAX: + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMIN: + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMOD: + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMUL: + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USEQ: + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USGE: + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USHR: + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USLT: + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USNE: + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_SWITCH: + exec_switch(mach, inst); + break; + + case TGSI_OPCODE_CASE: + exec_case(mach, inst); + break; + + case TGSI_OPCODE_DEFAULT: + exec_default(mach); + break; + + case TGSI_OPCODE_ENDSWITCH: + exec_endswitch(mach); + break; + default: assert( 0 ); } } + #define DEBUG_EXECUTION 0 @@ -3289,9 +3532,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; + mach->Switch.mask = 0xf; + assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; @@ -3348,11 +3595,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("(%6f, %6f, %6f, %6f)\n", - temps[i].xyzw[0].f[j], - temps[i].xyzw[1].f[j], - temps[i].xyzw[2].f[j], - temps[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); } } } @@ -3366,11 +3613,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("{%6f, %6f, %6f, %6f}\n", - outputs[i].xyzw[0].f[j], - outputs[i].xyzw[1].f[j], - outputs[i].xyzw[2].f[j], - outputs[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -3392,6 +3639,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fd94c1bc440..aa3a98d7f18 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -179,6 +179,7 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_COND_NESTING 32 #define TGSI_EXEC_MAX_LOOP_NESTING 32 +#define TGSI_EXEC_MAX_SWITCH_NESTING 32 #define TGSI_EXEC_MAX_CALL_NESTING 32 /* The maximum number of input attributes per vertex. For 2D @@ -191,6 +192,14 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 +/* The maximum number of vertices per primitive */ +#define TGSI_MAX_PRIM_VERTICES 6 + +/* The maximum number of primitives to be generated */ +#define TGSI_MAX_PRIMITIVES 64 + +/* The maximum total number of vertices */ +#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) /** function call/activation record */ struct tgsi_call_record @@ -198,10 +207,29 @@ struct tgsi_call_record uint CondStackTop; uint LoopStackTop; uint ContStackTop; + int SwitchStackTop; + int BreakStackTop; uint ReturnAddr; }; +/* Switch-case block state. */ +struct tgsi_switch_record { + uint mask; /**< execution mask */ + union tgsi_exec_channel selector; /**< a value case statements are compared to */ + uint defaultMask; /**< non-execute mask for default case */ +}; + + +enum tgsi_break_type { + TGSI_EXEC_BREAK_INSIDE_LOOP, + TGSI_EXEC_BREAK_INSIDE_SWITCH +}; + + +#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING) + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -214,8 +242,8 @@ struct tgsi_exec_machine float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS]; - struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; struct tgsi_exec_vector *Addrs; struct tgsi_exec_vector *Predicates; @@ -229,6 +257,8 @@ struct tgsi_exec_machine /* GEOMETRY processor only. */ unsigned *Primitives; + unsigned NumOutputs; + unsigned MaxGeometryShaderOutputs; /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; @@ -242,6 +272,12 @@ struct tgsi_exec_machine uint FuncMask; /**< For function calls */ uint ExecMask; /**< = CondMask & LoopMask */ + /* Current switch-case state. */ + struct tgsi_switch_record Switch; + + /* Current break type. */ + enum tgsi_break_type BreakType; + /** Condition mask stack (for nested conditionals) */ uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; int CondStackTop; @@ -262,6 +298,13 @@ struct tgsi_exec_machine uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; + /** Switch case stack */ + struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING]; + int SwitchStackTop; + + enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK]; + int BreakStackTop; + /** Function execution mask stack (for executing subroutine code) */ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; int FuncStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index be375cabb8b..de0e09cdbae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */ { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, @@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ + { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b34263da489..e4af15c156f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -124,7 +124,6 @@ OP11(I2F) OP11(NOT) OP11(TRUNC) OP12(SHL) -OP12(SHR) OP12(AND) OP12(OR) OP12(MOD) @@ -146,6 +145,28 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) +OP11(F2I) +OP12(IDIV) +OP12(IMAX) +OP12(IMIN) +OP11(INEG) +OP12(ISGE) +OP12(ISHR) +OP12(ISLT) +OP11(F2U) +OP11(U2F) +OP12(UADD) +OP12(UDIV) +OP13(UMAD) +OP12(UMAX) +OP12(UMIN) +OP12(UMOD) +OP12(UMUL) +OP12(USEQ) +OP12(USGE) +OP12(USHR) +OP12(USLT) +OP12(USNE) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index fa65ecb9975..8c7062d850c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -119,17 +119,29 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + uint imm_count; memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); + imm_count = imm->Immediate.NrTokens - 1; + switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - { - uint imm_count = imm->Immediate.NrTokens - 1; - for (i = 0; i < imm_count; i++) { - next_token(ctx, &imm->u[i]); - } + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Float); + } + break; + + case TGSI_IMM_UINT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Uint); + } + break; + + case TGSI_IMM_INT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Int); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index c27579e7942..9b0644465af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -26,32 +26,112 @@ **************************************************************************/ #include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_prim.h" +#include "cso_cache/cso_hash.h" #include "tgsi_sanity.h" #include "tgsi_info.h" #include "tgsi_iterate.h" -typedef uint reg_flag; - -#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) - -#define MAX_REGISTERS 1024 -#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG) +typedef struct { + uint file : 28; + /* max 2 dimensions */ + uint dimensions : 4; + uint indices[2]; +} scan_register; struct sanity_check_ctx { struct tgsi_iterate_context iter; + struct cso_hash *regs_decl; + struct cso_hash *regs_used; + struct cso_hash *regs_ind_used; - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - boolean regs_ind_used[TGSI_FILE_COUNT]; uint num_imms; uint num_instructions; uint index_of_END; uint errors; uint warnings; + uint implied_array_size; }; +static INLINE unsigned +scan_register_key(const scan_register *reg) +{ + unsigned key = reg->file; + key |= (reg->indices[0] << 4); + key |= (reg->indices[1] << 18); + + return key; +} + +static void +fill_scan_register1d(scan_register *reg, + uint file, uint index) +{ + reg->file = file; + reg->dimensions = 1; + reg->indices[0] = index; + reg->indices[1] = 0; +} + +static void +fill_scan_register2d(scan_register *reg, + uint file, uint index1, uint index2) +{ + reg->file = file; + reg->dimensions = 2; + reg->indices[0] = index1; + reg->indices[1] = index2; +} + +static void +scan_register_dst(scan_register *reg, + struct tgsi_full_dst_register *dst) +{ + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); +} + +static void +scan_register_src(scan_register *reg, + struct tgsi_full_src_register *src) +{ + if (src->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + debug_assert(!src->Dimension.Indirect); + fill_scan_register2d(reg, + src->Register.File, + src->Register.Index, + src->Dimension.Index); + } else { + fill_scan_register1d(reg, + src->Register.File, + src->Register.Index); + } +} + +static scan_register * +create_scan_register_src(struct tgsi_full_src_register *src) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_src(reg, src); + + return reg; +} + +static scan_register * +create_scan_register_dst(struct tgsi_full_dst_register *dst) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_dst(reg, dst); + + return reg; +} + static void report_error( struct sanity_check_ctx *ctx, @@ -99,12 +179,12 @@ check_file_name( static boolean is_register_declared( struct sanity_check_ctx *ctx, - uint file, - int index ) + const scan_register *reg) { - assert( index >= 0 && index < MAX_REGISTERS ); - - return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; + void *data = cso_hash_find_data_from_template( + ctx->regs_decl, scan_register_key(reg), + (void*)reg, sizeof(scan_register)); + return data ? TRUE : FALSE; } static boolean @@ -112,23 +192,37 @@ is_any_register_declared( struct sanity_check_ctx *ctx, uint file ) { - uint i; + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); - for (i = 0; i < MAX_REG_FLAGS; i++) - if (ctx->regs_decl[file][i]) + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (reg->file == file) return TRUE; + iter = cso_hash_iter_next(iter); + } + return FALSE; } static boolean is_register_used( struct sanity_check_ctx *ctx, - uint file, - int index ) + scan_register *reg) { - assert( index < MAX_REGISTERS ); + void *data = cso_hash_find_data_from_template( + ctx->regs_used, scan_register_key(reg), + reg, sizeof(scan_register)); + return data ? TRUE : FALSE; +} - return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; + +static boolean +is_ind_register_used( + struct sanity_check_ctx *ctx, + scan_register *reg) +{ + return cso_hash_contains(ctx->regs_ind_used, reg->file); } static const char *file_names[TGSI_FILE_COUNT] = @@ -148,31 +242,40 @@ static const char *file_names[TGSI_FILE_COUNT] = static boolean check_register_usage( struct sanity_check_ctx *ctx, - uint file, - int index, + scan_register *reg, const char *name, boolean indirect_access ) { - if (!check_file_name( ctx, file )) + if (!check_file_name( ctx, reg->file )) { + FREE(reg); return FALSE; + } if (indirect_access) { /* Note that 'index' is an offset relative to the value of the - * address register. No range checking done here. - */ - if (!is_any_register_declared( ctx, file )) - report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); - ctx->regs_ind_used[file] = TRUE; + * address register. No range checking done here.*/ + reg->indices[0] = 0; + reg->indices[1] = 0; + if (!is_any_register_declared( ctx, reg->file )) + report_error( ctx, "%s: Undeclared %s register", file_names[reg->file], name ); + if (!is_ind_register_used(ctx, reg)) + cso_hash_insert(ctx->regs_ind_used, reg->file, reg); + else + FREE(reg); } else { - if (index < 0 || index >= MAX_REGISTERS) { - report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name ); - return FALSE; - } - - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + if (!is_register_declared( ctx, reg )) { + if (reg->dimensions == 2) + report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], reg->indices[1], name ); + else + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], name ); + } + if (!is_register_used( ctx, reg )) + cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); + else + FREE(reg); } return TRUE; } @@ -210,33 +313,33 @@ iter_instruction( * Mark the registers as used. */ for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + scan_register *reg = create_scan_register_dst(&inst->Dst[i]); check_register_usage( ctx, - inst->Dst[i].Register.File, - inst->Dst[i].Register.Index, + reg, "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + scan_register *reg = create_scan_register_src(&inst->Src[i]); check_register_usage( ctx, - inst->Src[i].Register.File, - inst->Src[i].Register.Index, + reg, "source", (boolean)inst->Src[i].Register.Indirect ); if (inst->Src[i].Register.Indirect) { - uint file; - int index; + scan_register *ind_reg = MALLOC(sizeof(scan_register)); - file = inst->Src[i].Indirect.File; - index = inst->Src[i].Indirect.Index; + fill_scan_register1d(ind_reg, + inst->Src[i].Indirect.File, + inst->Src[i].Indirect.Index); check_register_usage( ctx, - file, - index, + reg, "indirect", FALSE ); - if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) { + if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || + reg->indices[0] != 0) { report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); } } @@ -266,6 +369,19 @@ iter_instruction( return TRUE; } +static void +check_and_declare(struct sanity_check_ctx *ctx, + scan_register *reg) +{ + if (is_register_declared( ctx, reg)) + report_error( ctx, "%s[%u]: The same register declared more than once", + file_names[reg->file], reg->indices[0] ); + cso_hash_insert(ctx->regs_decl, + scan_register_key(reg), + reg); +} + + static boolean iter_declaration( struct tgsi_iterate_context *iter, @@ -287,9 +403,21 @@ iter_declaration( if (!check_file_name( ctx, file )) return TRUE; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - if (is_register_declared( ctx, file, i )) - report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i ); - ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + /* declared TGSI_FILE_INPUT's for geometry processor + * have an implied second dimension */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + uint vert; + for (vert = 0; vert < ctx->implied_array_size; ++vert) { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register2d(reg, file, vert, i); + check_and_declare(ctx, reg); + } + } else { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, file, i); + check_and_declare(ctx, reg); + } } return TRUE; @@ -301,8 +429,7 @@ iter_immediate( struct tgsi_full_immediate *imm ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - - assert( ctx->num_imms < MAX_REGISTERS ); + scan_register *reg; /* No immediates allowed after the first instruction. */ @@ -311,12 +438,16 @@ iter_immediate( /* Mark the register as declared. */ - ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, TGSI_FILE_IMMEDIATE, ctx->num_imms); + cso_hash_insert(ctx->regs_decl, scan_register_key(reg), reg); ctx->num_imms++; /* Check data type validity. */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 && + imm->Immediate.DataType != TGSI_IMM_UINT32 && + imm->Immediate.DataType != TGSI_IMM_INT32) { report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); return TRUE; } @@ -330,8 +461,12 @@ iter_property( struct tgsi_iterate_context *iter, struct tgsi_full_property *prop ) { - /*struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;*/ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY && + prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { + ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); + } return TRUE; } @@ -340,7 +475,6 @@ epilog( struct tgsi_iterate_context *iter ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; /* There must be an END instruction somewhere. */ @@ -350,13 +484,17 @@ epilog( /* Check if all declared registers were used. */ - for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { - uint i; - - for (i = 0; i < MAX_REGISTERS; i++) { - if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { - report_warning( ctx, "%s[%u]: Register never used", file_names[file], i ); + { + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); + + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { + report_warning( ctx, "%s[%u]: Register never used", + file_names[reg->file], reg->indices[0] ); } + iter = cso_hash_iter_next(iter); } } @@ -368,6 +506,18 @@ epilog( return TRUE; } +static void +regs_hash_destroy(struct cso_hash *hash) +{ + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + iter = cso_hash_erase(hash, iter); + FREE(reg); + } + cso_hash_delete(hash); +} + boolean tgsi_sanity_check( const struct tgsi_token *tokens ) @@ -381,18 +531,23 @@ tgsi_sanity_check( ctx.iter.iterate_property = iter_property; ctx.iter.epilog = epilog; - memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); - memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); - memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); + ctx.regs_decl = cso_hash_create(); + ctx.regs_used = cso_hash_create(); + ctx.regs_ind_used = cso_hash_create(); + ctx.num_imms = 0; ctx.num_instructions = 0; ctx.index_of_END = ~0; ctx.errors = 0; ctx.warnings = 0; + ctx.implied_array_size = 0; if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; + regs_hash_destroy(ctx.regs_decl); + regs_hash_destroy(ctx.regs_used); + regs_hash_destroy(ctx.regs_ind_used); return ctx.errors == 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 0f48b0dc3a1..a6cc773003a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -229,8 +229,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->Register.File != TGSI_FILE_INPUT || - src->Register.File != TGSI_FILE_SYSTEM_VALUE || + (src->Register.File != TGSI_FILE_INPUT && + src->Register.File != TGSI_FILE_SYSTEM_VALUE) || dst->Register.File != TGSI_FILE_OUTPUT || src->Register.Index != dst->Register.Index || diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d63c75dafb3..118059ace9c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2578,7 +2578,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: return 0; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index f000958bfc0..9fcffeda368 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -27,7 +27,9 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "pipe/p_defines.h" +#include "pipe/p_inlines.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -61,18 +63,20 @@ static boolean uprcase( char c ) } /* - * Ignore case of str1 and assume str2 is already uppercase. + * Ignore case of str1 and assume str1 is already uppercase. * Return TRUE iff str1 and str2 are equal. */ static int streq_nocase_uprcase(const char *str1, const char *str2) { - while (*str1 && uprcase(*str1) == *str2) { + while (*str1 && *str2) { + if (*str1 != uprcase(*str2)) + return FALSE; str1++; str2++; } - return *str1 == *str2; + return TRUE; } static boolean str_match_no_case( const char **pcur, const char *str ) @@ -193,11 +197,26 @@ struct translate_ctx struct tgsi_token *tokens_cur; struct tgsi_token *tokens_end; struct tgsi_header *header; + unsigned processor : 4; + int implied_array_size : 5; }; static void report_error( struct translate_ctx *ctx, const char *msg ) { - debug_printf( "\nError: %s", msg ); + int line = 1; + int column = 1; + const char *itr = ctx->text; + + while (itr != ctx->cur) { + if (*itr == '\n') { + column = 1; + ++line; + } + ++column; + ++itr; + } + + debug_printf( "\nTGSI asm error: %s [%d : %d] \n", msg, line, column ); } /* Parse shader header. @@ -229,6 +248,7 @@ static boolean parse_header( struct translate_ctx *ctx ) if (ctx->tokens_cur >= ctx->tokens_end) return FALSE; *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + ctx->processor = processor; return TRUE; } @@ -325,92 +345,36 @@ parse_opt_writemask( return TRUE; } -/* <register_file_bracket> ::= <file> `[' - */ static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} +parse_register_dst( struct translate_ctx *ctx, + uint *file, + int *index ); -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; +struct parsed_src_bracket { + int index; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} + uint ind_file; + int ind_index; + uint ind_comp; +}; -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ -static boolean -parse_register_dst( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} -/* Parse source register operand. - * <register_src> ::= <register_file_bracket_index> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' - */ static boolean -parse_register_src( +parse_register_src_bracket( struct translate_ctx *ctx, - uint *file, - int *index, - uint *ind_file, - int *ind_index, - uint *ind_comp) + struct parsed_src_bracket *brackets) { const char *cur; uint uindex; - *ind_comp = TGSI_SWIZZLE_X; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; + memset(brackets, 0, sizeof(struct parsed_src_bracket)); + eat_opt_white( &ctx->cur ); + cur = ctx->cur; - if (parse_file( &cur, ind_file )) { - if (!parse_register_dst( ctx, ind_file, ind_index )) + if (parse_file( &cur, &brackets->ind_file )) { + if (!parse_register_dst( ctx, &brackets->ind_file, + &brackets->ind_index )) return FALSE; eat_opt_white( &ctx->cur ); @@ -420,16 +384,16 @@ parse_register_src( switch (uprcase(*ctx->cur)) { case 'X': - *ind_comp = TGSI_SWIZZLE_X; + brackets->ind_comp = TGSI_SWIZZLE_X; break; case 'Y': - *ind_comp = TGSI_SWIZZLE_Y; + brackets->ind_comp = TGSI_SWIZZLE_Y; break; case 'Z': - *ind_comp = TGSI_SWIZZLE_Z; + brackets->ind_comp = TGSI_SWIZZLE_Z; break; case 'W': - *ind_comp = TGSI_SWIZZLE_W; + brackets->ind_comp = TGSI_SWIZZLE_W; break; default: report_error(ctx, "Expected indirect register swizzle component `x', `y', `z' or `w'"); @@ -450,12 +414,12 @@ parse_register_src( return FALSE; } if (negate) - *index = -(int) uindex; + brackets->index = -(int) uindex; else - *index = (int) uindex; + brackets->index = (int) uindex; } else { - *index = 0; + brackets->index = 0; } } else { @@ -463,9 +427,9 @@ parse_register_src( report_error( ctx, "Expected literal unsigned integer" ); return FALSE; } - *index = (int) uindex; - *ind_file = TGSI_FILE_NULL; - *ind_index = 0; + brackets->index = (int) uindex; + brackets->ind_file = TGSI_FILE_NULL; + brackets->ind_index = 0; } eat_opt_white( &ctx->cur ); if (*ctx->cur != ']') { @@ -476,20 +440,123 @@ parse_register_src( return TRUE; } -/* Parse register declaration. - * <register_dcl> ::= <register_file_bracket_index> `]' | - * <register_file_bracket_index> `..' <index> `]' +static boolean +parse_opt_register_src_bracket( + struct translate_ctx *ctx, + struct parsed_src_bracket *brackets, + int *parsed_brackets) +{ + const char *cur = ctx->cur; + + *parsed_brackets = 0; + + eat_opt_white( &cur ); + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + + if (!parse_register_src_bracket(ctx, brackets)) + return FALSE; + + *parsed_brackets = 1; + } + + return TRUE; +} + +/* <register_file_bracket> ::= <file> `[' */ static boolean -parse_register_dcl( +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse source register operand. + * <register_src> ::= <register_file_bracket_index> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' + */ +static boolean +parse_register_src( struct translate_ctx *ctx, uint *file, - int *first, - int *last ) + struct parsed_src_bracket *brackets) +{ + + brackets->ind_comp = TGSI_SWIZZLE_X; + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + if (!parse_register_src_bracket( ctx, brackets )) + return FALSE; + + return TRUE; +} + +struct parsed_dcl_bracket { + uint first; + uint last; +}; + +static boolean +parse_register_dcl_bracket( + struct translate_ctx *ctx, + struct parsed_dcl_bracket *bracket) { - if (!parse_register_file_bracket_index( ctx, file, first )) + uint uindex; + memset(bracket, 0, sizeof(struct parsed_dcl_bracket)); + + eat_opt_white( &ctx->cur ); + + if (!parse_uint( &ctx->cur, &uindex )) { + /* it can be an empty bracket [] which means its range + * is from 0 to some implied size */ + if (ctx->cur[0] == ']' && ctx->implied_array_size != 0) { + bracket->first = 0; + bracket->last = ctx->implied_array_size - 1; + goto cleanup; + } + report_error( ctx, "Expected literal unsigned integer" ); return FALSE; + } + bracket->first = (int) uindex; + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { uint uindex; @@ -499,12 +566,14 @@ parse_register_dcl( report_error( ctx, "Expected literal integer" ); return FALSE; } - *last = (int) uindex; + bracket->last = (int) uindex; eat_opt_white( &ctx->cur ); } else { - *last = *first; + bracket->last = bracket->first; } + +cleanup: if (*ctx->cur != ']') { report_error( ctx, "Expected `]' or `..'" ); return FALSE; @@ -513,6 +582,70 @@ parse_register_dcl( return TRUE; } +/* Parse register declaration. + * <register_dcl> ::= <register_file_bracket_index> `]' | + * <register_file_bracket_index> `..' <index> `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + struct parsed_dcl_bracket *brackets, + int *num_brackets) +{ + const char *cur; + + *num_brackets = 0; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + if (!parse_register_dcl_bracket( ctx, &brackets[0] )) + return FALSE; + + *num_brackets = 1; + + cur = ctx->cur; + eat_opt_white( &cur ); + + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + if (!parse_register_dcl_bracket( ctx, &brackets[1] )) + return FALSE; + /* for geometry shader we don't really care about + * the first brackets it's always the size of the + * input primitive. so we want to declare just + * the index relevant to the semantics which is in + * the second bracket */ + if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + brackets[0] = brackets[1]; + } + *num_brackets = 2; + } + + return TRUE; +} + + +/* Parse destination register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_dst( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} static boolean parse_dst_operand( @@ -582,37 +715,44 @@ parse_src_operand( struct tgsi_full_src_register *src ) { uint file; - int index; - uint ind_file; - int ind_index; - uint ind_comp; uint swizzle[4]; boolean parsed_swizzle; + struct parsed_src_bracket bracket[2]; + int parsed_opt_brackets; if (*ctx->cur == '-') { ctx->cur++; eat_opt_white( &ctx->cur ); src->Register.Negate = 1; } - + if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); src->Register.Absolute = 1; } - if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp)) + if (!parse_register_src(ctx, &file, &bracket[0])) + return FALSE; + if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) return FALSE; + src->Register.File = file; - src->Register.Index = index; - if (ind_file != TGSI_FILE_NULL) { + src->Register.Index = bracket[0].index; + if (bracket[0].ind_file != TGSI_FILE_NULL) { src->Register.Indirect = 1; - src->Indirect.File = ind_file; - src->Indirect.Index = ind_index; - src->Indirect.SwizzleX = ind_comp; - src->Indirect.SwizzleY = ind_comp; - src->Indirect.SwizzleZ = ind_comp; - src->Indirect.SwizzleW = ind_comp; + src->Indirect.File = bracket[0].ind_file; + src->Indirect.Index = bracket[0].ind_index; + src->Indirect.SwizzleX = bracket[0].ind_comp; + src->Indirect.SwizzleY = bracket[0].ind_comp; + src->Indirect.SwizzleZ = bracket[0].ind_comp; + src->Indirect.SwizzleW = bracket[0].ind_comp; + } + if (parsed_opt_brackets) { + src->Register.Dimension = 1; + src->Dimension.Indirect = 0; + src->Dimension.Dimension = 0; + src->Dimension.Index = bracket[1].index; } /* Parse optional swizzle. @@ -791,7 +931,9 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "PSIZE", "GENERIC", "NORMAL", - "FACE" + "FACE", + "EDGEFLAG", + "PRIM_ID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = @@ -805,8 +947,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; uint file; - int first; - int last; + struct parsed_dcl_bracket brackets[2]; + int num_brackets; uint writemask; const char *cur; uint advance; @@ -818,7 +960,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!parse_register_dcl( ctx, &file, &first, &last )) + if (!parse_register_dcl( ctx, &file, brackets, &num_brackets)) return FALSE; if (!parse_opt_writemask( ctx, &writemask )) return FALSE; @@ -826,8 +968,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.Range.First = first; - decl.Range.Last = last; + decl.Range.First = brackets[0].first; + decl.Range.Last = brackets[0].last; cur = ctx->cur; eat_opt_white( &cur ); @@ -1027,7 +1169,7 @@ static boolean parse_property( struct translate_ctx *ctx ) } for (property_name = 0; property_name < TGSI_PROPERTY_COUNT; ++property_name) { - if (streq_nocase_uprcase(id, property_names[property_name])) { + if (streq_nocase_uprcase(property_names[property_name], id)) { break; } } @@ -1044,6 +1186,10 @@ static boolean parse_property( struct translate_ctx *ctx ) report_error( ctx, "Unknown primitive name as property!" ); return FALSE; } + if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM && + ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + ctx->implied_array_size = u_vertices_per_prim(values[0]); + } break; default: if (!parse_uint(&ctx->cur, &values[0] )) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 2713372b059..2b51672b8ea 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -109,8 +109,13 @@ struct ureg_program unsigned nr_outputs; struct { - float v[4]; + union { + float f[4]; + unsigned u[4]; + int i[4]; + } value; unsigned nr; + unsigned type; } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; @@ -513,22 +518,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, } - - -static int match_or_expand_immediate( const float *v, - unsigned nr, - float *v2, - unsigned *nr2, - unsigned *swizzle ) +static int +match_or_expand_immediate( const unsigned *v, + unsigned nr, + unsigned *v2, + unsigned *pnr2, + unsigned *swizzle ) { + unsigned nr2 = *pnr2; unsigned i, j; - + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; - for (j = 0; j < *nr2 && !found; j++) { + for (j = 0; j < nr2 && !found; j++) { if (v[i] == v2[j]) { *swizzle |= j << (i * 2); found = TRUE; @@ -536,24 +541,28 @@ static int match_or_expand_immediate( const float *v, } if (!found) { - if (*nr2 >= 4) + if (nr2 >= 4) { return FALSE; + } - v2[*nr2] = v[i]; - *swizzle |= *nr2 << (i * 2); - (*nr2)++; + v2[nr2] = v[i]; + *swizzle |= nr2 << (i * 2); + nr2++; } } + /* Actually expand immediate only when fully succeeded. + */ + *pnr2 = nr2; return TRUE; } - - -struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, - const float *v, - unsigned nr ) +static struct ureg_src +decl_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned nr, + unsigned type ) { unsigned i, j; unsigned swizzle; @@ -563,38 +572,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, */ for (i = 0; i < ureg->nr_immediates; i++) { - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + if (ureg->immediate[i].type != type) { + continue; + } + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { i = ureg->nr_immediates++; - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + ureg->immediate[i].type = type; + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } - set_bad( ureg ); + set_bad(ureg); out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - for (j = nr; j < 4; j++) + for (j = nr; j < 4; j++) { swizzle |= (swizzle & 0x3) << (j * 2); + } + + return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), + (swizzle >> 0) & 0x3, + (swizzle >> 2) & 0x3, + (swizzle >> 4) & 0x3, + (swizzle >> 6) & 0x3); +} + + +struct ureg_src +ureg_DECL_immediate( struct ureg_program *ureg, + const float *v, + unsigned nr ) +{ + union { + float f[4]; + unsigned u[4]; + } fu; + unsigned int i; + + for (i = 0; i < nr; i++) { + fu.f[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); +} + - return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), - (swizzle >> 0) & 0x3, - (swizzle >> 2) & 0x3, - (swizzle >> 4) & 0x3, - (swizzle >> 6) & 0x3); +struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); +} + + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *ureg, + const int *v, + unsigned nr ) +{ + return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } @@ -818,8 +871,8 @@ ureg_insn(struct ureg_program *ureg, unsigned i; boolean saturate; boolean predicate; - boolean negate; - unsigned swizzle[4]; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; @@ -865,8 +918,8 @@ ureg_tex_insn(struct ureg_program *ureg, unsigned i; boolean saturate; boolean predicate; - boolean negate; - unsigned swizzle[4]; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; @@ -982,21 +1035,23 @@ static void emit_decl_range( struct ureg_program *ureg, out[1].decl_range.Last = first + count - 1; } -static void emit_immediate( struct ureg_program *ureg, - const float *v ) +static void +emit_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned type ) { union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); out[0].value = 0; out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; out[0].imm.NrTokens = 5; - out[0].imm.DataType = TGSI_IMM_FLOAT32; + out[0].imm.DataType = type; out[0].imm.Padding = 0; - out[1].imm_data.Float = v[0]; - out[2].imm_data.Float = v[1]; - out[3].imm_data.Float = v[2]; - out[4].imm_data.Float = v[3]; + out[1].imm_data.Uint = v[0]; + out[2].imm_data.Uint = v[1]; + out[3].imm_data.Uint = v[2]; + out[4].imm_data.Uint = v[3]; } @@ -1091,7 +1146,8 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, - ureg->immediate[i].v ); + ureg->immediate[i].value.u, + ureg->immediate[i].type ); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 179862d4e27..38e2fd8d0a9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -154,6 +154,16 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *, + const int *v, + unsigned nr ); + +struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -227,6 +237,90 @@ ureg_imm1f( struct ureg_program *ureg, return ureg_DECL_immediate( ureg, v, 1 ); } +static INLINE struct ureg_src +ureg_imm4u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c, unsigned d) +{ + unsigned v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_uint( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c) +{ + unsigned v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_uint( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2u( struct ureg_program *ureg, + unsigned a, unsigned b) +{ + unsigned v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_uint( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1u( struct ureg_program *ureg, + unsigned a) +{ + return ureg_DECL_immediate_uint( ureg, &a, 1 ); +} + +static INLINE struct ureg_src +ureg_imm4i( struct ureg_program *ureg, + int a, int b, + int c, int d) +{ + int v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_int( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3i( struct ureg_program *ureg, + int a, int b, + int c) +{ + int v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_int( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2i( struct ureg_program *ureg, + int a, int b) +{ + int v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_int( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1i( struct ureg_program *ureg, + int a) +{ + return ureg_DECL_immediate_int( ureg, &a, 1 ); +} + /*********************************************************************** * Functions for patching up labels */ diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile deleted file mode 100644 index 3c82f8ae037..00000000000 --- a/src/gallium/auxiliary/translate/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = translate - -C_SOURCES = \ - translate_generic.c \ - translate_sse.c \ - translate.c \ - translate_cache.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript deleted file mode 100644 index 9553a675372..00000000000 --- a/src/gallium/auxiliary/translate/SConscript +++ /dev/null @@ -1,12 +0,0 @@ -Import('*') - -translate = env.ConvenienceLibrary( - target = 'translate', - source = [ - 'translate_generic.c', - 'translate_sse.c', - 'translate.c', - 'translate_cache.c', - ]) - -auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile deleted file mode 100644 index 3ed90fd1b70..00000000000 --- a/src/gallium/auxiliary/util/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = util - -C_SOURCES = \ - u_debug.c \ - u_debug_dump.c \ - u_debug_symbol.c \ - u_debug_stack.c \ - u_blit.c \ - u_blitter.c \ - u_cache.c \ - u_cpu_detect.c \ - u_dl.c \ - u_draw_quad.c \ - u_format.c \ - u_format_access.c \ - u_format_table.c \ - u_gen_mipmap.c \ - u_handle_table.c \ - u_hash_table.c \ - u_hash.c \ - u_keymap.c \ - u_linear.c \ - u_network.c \ - u_math.c \ - u_mm.c \ - u_rect.c \ - u_simple_shaders.c \ - u_snprintf.c \ - u_stream_stdc.c \ - u_stream_wd.c \ - u_surface.c \ - u_texture.c \ - u_tile.c \ - u_time.c \ - u_timed_winsys.c \ - u_upload_mgr.c \ - u_simple_screen.c - -include ../../Makefile.template - -u_format_table.c: u_format_table.py u_format_parse.py u_format.csv - python u_format_table.py u_format.csv > $@ - -u_format_access.c: u_format_access.py u_format_parse.py u_format.csv - python u_format_access.py u_format.csv > $@ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript deleted file mode 100644 index 2a546d19dc0..00000000000 --- a/src/gallium/auxiliary/util/SConscript +++ /dev/null @@ -1,61 +0,0 @@ -Import('*') - -env.Clone() - -env.Append(CPPPATH = ['.']) - -env.CodeGenerate( - target = 'u_format_table.c', - script = 'u_format_table.py', - source = ['u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' -) - -env.CodeGenerate( - target = 'u_format_access.c', - script = 'u_format_access.py', - source = ['u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' -) - -util = env.ConvenienceLibrary( - target = 'util', - source = [ - 'u_bitmask.c', - 'u_blit.c', - 'u_blitter.c', - 'u_cache.c', - 'u_cpu_detect.c', - 'u_debug.c', - 'u_debug_dump.c', - 'u_debug_memory.c', - 'u_debug_stack.c', - 'u_debug_symbol.c', - 'u_dl.c', - 'u_draw_quad.c', - 'u_format.c', - 'u_format_access.c', - 'u_format_table.c', - 'u_gen_mipmap.c', - 'u_handle_table.c', - 'u_hash.c', - 'u_hash_table.c', - 'u_keymap.c', - 'u_network.c', - 'u_math.c', - 'u_mm.c', - 'u_rect.c', - 'u_simple_shaders.c', - 'u_snprintf.c', - 'u_stream_stdc.c', - 'u_stream_wd.c', - 'u_surface.c', - 'u_texture.c', - 'u_tile.c', - 'u_time.c', - 'u_timed_winsys.c', - 'u_upload_mgr.c', - 'u_simple_screen.c', - ]) - -auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_bitmask.c b/src/gallium/auxiliary/util/u_bitmask.c index 77587c07ec0..23c93a3ebcb 100644 --- a/src/gallium/auxiliary/util/u_bitmask.c +++ b/src/gallium/auxiliary/util/u_bitmask.c @@ -97,12 +97,12 @@ util_bitmask_resize(struct util_bitmask *bm, if(!minimum_size) return FALSE; - if(bm->size > minimum_size) + if(bm->size >= minimum_size) return TRUE; assert(bm->size % UTIL_BITMASK_BITS_PER_WORD == 0); new_size = bm->size; - while(!(new_size > minimum_size)) { + while(new_size < minimum_size) { new_size *= 2; /* Check integer overflow */ if(new_size < bm->size) @@ -136,7 +136,7 @@ util_bitmask_filled_set(struct util_bitmask *bm, unsigned index) { assert(bm->filled <= bm->size); - assert(index <= bm->size); + assert(index < bm->size); if(index == bm->filled) { ++bm->filled; @@ -149,7 +149,7 @@ util_bitmask_filled_unset(struct util_bitmask *bm, unsigned index) { assert(bm->filled <= bm->size); - assert(index <= bm->size); + assert(index < bm->size); if(index < bm->filled) bm->filled = index; @@ -182,7 +182,7 @@ util_bitmask_add(struct util_bitmask *bm) mask = 1; } found: - + /* grow the bitmask if necessary */ if(!util_bitmask_resize(bm, bm->filled)) return UTIL_BITMASK_INVALID_INDEX; @@ -198,9 +198,9 @@ unsigned util_bitmask_set(struct util_bitmask *bm, unsigned index) { - unsigned word = index / UTIL_BITMASK_BITS_PER_WORD; - unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD; - util_bitmask_word mask = 1 << bit; + unsigned word; + unsigned bit; + util_bitmask_word mask; assert(bm); @@ -208,6 +208,10 @@ util_bitmask_set(struct util_bitmask *bm, if(!util_bitmask_resize(bm, index)) return UTIL_BITMASK_INVALID_INDEX; + word = index / UTIL_BITMASK_BITS_PER_WORD; + bit = index % UTIL_BITMASK_BITS_PER_WORD; + mask = 1 << bit; + bm->words[word] |= mask; util_bitmask_filled_set(bm, index); @@ -220,15 +224,19 @@ void util_bitmask_clear(struct util_bitmask *bm, unsigned index) { - unsigned word = index / UTIL_BITMASK_BITS_PER_WORD; - unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD; - util_bitmask_word mask = 1 << bit; + unsigned word; + unsigned bit; + util_bitmask_word mask; assert(bm); if(index >= bm->size) return; + word = index / UTIL_BITMASK_BITS_PER_WORD; + bit = index % UTIL_BITMASK_BITS_PER_WORD; + mask = 1 << bit; + bm->words[word] &= ~mask; util_bitmask_filled_unset(bm, index); @@ -250,7 +258,7 @@ util_bitmask_get(struct util_bitmask *bm, return TRUE; } - if(index > bm->size) + if(index >= bm->size) return FALSE; if(bm->words[word] & mask) { diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 4e01123fff1..9b4e6ca2a73 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -454,7 +454,8 @@ debug_dump_flags(const struct debug_named_value *names, util_strncat(output, "|", sizeof(output)); else first = 0; - util_strncat(output, names->name, sizeof(output)); + util_strncat(output, names->name, sizeof(output) - 1); + output[sizeof(output) - 1] = '\0'; value &= ~names->value; } ++names; @@ -467,7 +468,8 @@ debug_dump_flags(const struct debug_named_value *names, first = 0; util_snprintf(rest, sizeof(rest), "0x%08lx", value); - util_strncat(output, rest, sizeof(output)); + util_strncat(output, rest, sizeof(output) - 1); + output[sizeof(output) - 1] = '\0'; } if(first) diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index abd834c741a..facc30a5534 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -188,7 +188,7 @@ void _debug_assert_fail(const char *expr, #ifdef DEBUG #define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) #else -#define debug_assert(expr) ((void)0) +#define debug_assert(expr) do { } while (0 && (expr)) #endif diff --git a/src/gallium/auxiliary/util/u_debug_memory.c b/src/gallium/auxiliary/util/u_debug_memory.c index 7623cb93981..d6484f4ad51 100644 --- a/src/gallium/auxiliary/util/u_debug_memory.c +++ b/src/gallium/auxiliary/util/u_debug_memory.c @@ -297,9 +297,9 @@ debug_memory_end(unsigned long start_no) if((start_no <= hdr->no && hdr->no < last_no) || (last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) { - debug_printf("%s:%u:%s: %u bytes at %p not freed\n", + debug_printf("%s:%u:%s: %lu bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, - hdr->size, ptr); + (unsigned long) hdr->size, ptr); #if DEBUG_MEMORY_STACK debug_backtrace_dump(hdr->backtrace, DEBUG_MEMORY_STACK); #endif @@ -315,8 +315,8 @@ debug_memory_end(unsigned long start_no) } if(total_size) { - debug_printf("Total of %u KB of system memory apparently leaked\n", - (total_size + 1023)/1024); + debug_printf("Total of %lu KB of system memory apparently leaked\n", + (unsigned long) (total_size + 1023)/1024); } else { debug_printf("No memory leaks detected.\n"); diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 090183fb174..a558923b2ed 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -119,7 +119,7 @@ enum util_format_colorspace { UTIL_FORMAT_COLORSPACE_RGB = 0, UTIL_FORMAT_COLORSPACE_SRGB = 1, UTIL_FORMAT_COLORSPACE_YUV = 2, - UTIL_FORMAT_COLORSPACE_ZS = 3, + UTIL_FORMAT_COLORSPACE_ZS = 3 }; diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 81aeb83cbb5..b2969a210a7 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -585,13 +585,12 @@ do { \ static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits) { - value *= (1<<frac_bits); - return value < 0 ? 0 : value; + return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits)); } static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits) { - return value * (1<<frac_bits); + return (int32_t)(value * (1<<frac_bits)); } diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 6269c72e121..87ee0e47685 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> @@ -54,7 +54,7 @@ u_socket_close(int s) if (s < 0) return; -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) shutdown(s, SHUT_RDWR); close(s); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) @@ -117,7 +117,7 @@ u_socket_connect(const char *hostname, uint16_t port) if (!host) return -1; - memcpy((char *)&sa.sin_addr,host->h_addr,host->h_length); + memcpy((char *)&sa.sin_addr,host->h_addr_list[0],host->h_length); sa.sin_family= host->h_addrtype; sa.sin_port = htons(port); @@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum) void u_socket_block(int s, boolean block) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) int old = fcntl(s, F_GETFL, 0); if (old == -1) return; diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h index 0aa898b9676..187dcab86e7 100644 --- a/src/gallium/auxiliary/util/u_network.h +++ b/src/gallium/auxiliary/util/u_network.h @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define PIPE_HAVE_SOCKETS -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) # define PIPE_HAVE_SOCKETS #endif diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 74343299623..10a874f3416 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -135,6 +135,39 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim ) } } +static INLINE unsigned +u_vertices_per_prim(int primitive) +{ + switch(primitive) { + case PIPE_PRIM_POINTS: + return 1; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + return 2; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + return 3; + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return 4; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return 6; + + /* following primitives should never be used + * with geometry shaders abd their size is + * undefined */ + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + default: + debug_printf("Unrecognized geometry shader primitive"); + return 3; + } +} + const char *u_prim_name( unsigned pipe_prim ); #endif diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c index 5cd05b29047..4d976d6dca4 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -32,7 +32,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) #include <stdio.h> diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile deleted file mode 100644 index 4314c1e8d69..00000000000 --- a/src/gallium/auxiliary/vl/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = vl - -C_SOURCES = \ - vl_bitstream_parser.c \ - vl_mpeg12_mc_renderer.c \ - vl_compositor.c \ - vl_csc.c \ - vl_shader_build.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript deleted file mode 100644 index aed69f5efed..00000000000 --- a/src/gallium/auxiliary/vl/SConscript +++ /dev/null @@ -1,13 +0,0 @@ -Import('*') - -vl = env.ConvenienceLibrary( - target = 'vl', - source = [ - 'vl_bitstream_parser.c', - 'vl_mpeg12_mc_renderer.c', - 'vl_compositor.c', - 'vl_csc.c', - 'vl_shader_build.c', - ]) - -auxiliaries.insert(0, vl) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index ab196c21f87..caf581aca60 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -334,11 +334,13 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) @@ -442,11 +444,13 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) @@ -532,11 +536,13 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) @@ -658,11 +664,13 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) free(tokens); } +#if 0 static void create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) { assert(false); } +#endif static void xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) @@ -1081,6 +1089,9 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, assert(ycbcr_vb); assert(pos < r->macroblocks_per_batch); + mo_vec[1].x = 0; + mo_vec[1].y = 0; + switch (mb->mb_type) { case PIPE_MPEG12_MACROBLOCK_TYPE_BI: { diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 5cc1d4ddf81..01bea0f8cce 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp) } } - draw_set_mapped_constant_buffer(sp->draw, + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], sp->constants[PIPE_SHADER_VERTEX].buffer->size); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index efc4f78364b..b723e794e71 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index ac5fafec1ad..5b87286d4c5 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -331,7 +331,7 @@ cell_emit_state(struct cell_context *cell) const struct draw_context *const draw = cell->draw; struct cell_shader_info info; - info.num_outputs = draw_num_vs_outputs(draw); + info.num_outputs = draw_num_shader_outputs(draw); info.declarations = (uintptr_t) draw->vs.machine.Declarations; info.num_declarations = draw->vs.machine.NumDeclarations; info.instructions = (uintptr_t) draw->vs.machine.Instructions; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 5ed330aa6ec..d86d8e09a51 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1681,7 +1681,7 @@ exec_instruction( } break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c index effeba12972..669964770d4 100644 --- a/src/gallium/drivers/i915/i915_buffer.c +++ b/src/gallium/drivers/i915/i915_buffer.c @@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen, { struct i915_buffer *buf = i915_buffer(buffer); assert(!buf->ibuf); + (void) buf; } static void diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 94c8aee30fe..949f0463501 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe, } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, i915->current.constants[PIPE_SHADER_VERTEX], (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index e580b6c0f7f..1528afc8599 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -58,10 +58,10 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; -/* + /* case PIPE_TEX_WRAP_MIRRORED_REPEAT: return TEXCOORDMODE_MIRROR; -*/ + */ default: return TEXCOORDMODE_WRAP; } diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 178d4e8781d..03dd5091a61 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* pos */ - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; @@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* primary color */ if (colors[0]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_COLOR; } /* secondary color */ if (colors[1]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; } /* fog coord, not fog blend factor */ if (fog) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; } @@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 58d9e56df27..d67a1a62633 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -83,19 +83,19 @@ compile_clip_prog( struct brw_context *brw, c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; - if (c.key.output_color0) + if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT) c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; - if (c.key.output_color1) + if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT) c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; - if (c.key.output_bfc0) + if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; - if (c.key.output_bfc1) + if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; - if (c.key.output_edgeflag) + if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) @@ -182,7 +182,6 @@ upload_clip_prog(struct brw_context *brw) */ /* CACHE_NEW_VS_PROG */ key.nr_attrs = brw->vs.prog_data->nr_outputs; - key.output_edgeflag = brw->vs.prog_data->output_edgeflag; /* PIPE_NEW_VS */ key.output_hpos = vs->output_hpos; @@ -190,6 +189,7 @@ upload_clip_prog(struct brw_context *brw) key.output_color1 = vs->output_color1; key.output_bfc0 = vs->output_bfc0; key.output_bfc1 = vs->output_bfc1; + key.output_edgeflag = vs->output_edgeflag; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 56e78074000..8c006bb95b2 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -120,6 +120,13 @@ #define BRW_MAX_CURBE (32*16) + +/* Need a value to say a particular vertex shader output isn't + * present. Limits us to 63 outputs currently. + */ +#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1) + + struct brw_context; struct brw_depth_stencil_state { @@ -335,8 +342,6 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ - GLuint output_edgeflag; - GLboolean writes_psiz; /* Used for calculating urb partitions: diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h index 77d402d35e6..ba5b109c483 100644 --- a/src/gallium/drivers/i965/brw_disasm.h +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -23,6 +23,8 @@ #ifndef BRW_DISASM_H #define BRW_DISASM_H +#include <stdio.h> + struct brw_instruction; int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 4fe7b6acc16..00d8eaccbc4 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -860,7 +860,7 @@ void brw_land_fwd_jump(struct brw_compile *p, jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index 211be881789..452e1e89f93 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -114,18 +114,18 @@ static void color_clear(struct brw_context *brw, const float *rgba ) { enum pipe_error ret; - unsigned value; + union util_color value; util_pack_color( rgba, bsurface->base.format, &value ); if (bsurface->cpp == 2) - value |= value << 16; + value.ui |= value.ui << 16; - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); if (ret != 0) { brw_context_flush( brw ); - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); assert( ret == 0 ); } } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 6b03094f502..5d4e5025f97 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "brw_context.h" +#include "brw_debug.h" /** * called from intelDrawBuffer() @@ -51,8 +52,14 @@ static void brw_set_viewport_state( struct pipe_context *pipe, struct brw_context *brw = brw_context(pipe); brw->curr.viewport = *viewport; - brw->curr.ccv.min_depth = 0.0; /* XXX: near */ - brw->curr.ccv.max_depth = 1.0; /* XXX: far */ + brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2]; + brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2]; + + if (0) + debug_printf("%s depth range %f .. %f\n", + __FUNCTION__, + brw->curr.ccv.min_depth, + brw->curr.ccv.max_depth); brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 20f20571f65..bb32d90e331 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -197,6 +197,13 @@ static void *brw_create_vs_state( struct pipe_context *pipe, vs->id = brw->program_id++; vs->has_flow_control = has_flow_control(&vs->info); + vs->output_hpos = BRW_OUTPUT_NOT_PRESENT; + vs->output_color0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_color1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT; + for (i = 0; i < vs->info.num_outputs; i++) { int index = vs->info.output_semantic_index[i]; switch (vs->info.output_semantic_name[i]) { diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 714def5046d..8a16205d2f6 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -79,18 +79,12 @@ static void release_tmps( struct brw_vs_compile *c ) static boolean is_position_output( struct brw_vs_compile *c, unsigned vs_output ) { - struct brw_vertex_shader *vs = c->vp; - - if (vs_output == c->prog_data.output_edgeflag) { - return FALSE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; - return (semantic == TGSI_SEMANTIC_POSITION && - index == 0); - } + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); } @@ -98,23 +92,16 @@ static boolean find_output_slot( struct brw_vs_compile *c, unsigned vs_output, unsigned *fs_input_slot ) { - struct brw_vertex_shader *vs = c->vp; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; - if (vs_output == c->prog_data.output_edgeflag) { - *fs_input_slot = c->key.fs_signature.nr_inputs; - return TRUE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - unsigned i; - - for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { - if (c->key.fs_signature.input[i].semantic == semantic && + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && c->key.fs_signature.input[i].semantic_index == index) { - *fs_input_slot = i; - return TRUE; - } + *fs_input_slot = i; + return TRUE; } } diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index de6156795d3..3ca676647c7 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.Append(CPPPATH = ['.']) + env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', @@ -74,7 +76,7 @@ llvmpipe = env.ConvenienceLibrary( env = env.Clone() -env.Prepend(LIBS = [llvmpipe] + auxiliaries) +env.Prepend(LIBS = [llvmpipe] + gallium) tests = [ 'format', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index d14f468ba93..ced7b9c11d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, enum lp_build_blend_swizzle { LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index dcc25fbff86..25c10af29f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -47,7 +47,7 @@ */ enum lp_build_flow_construct_kind { lP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_SKIP }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index 5836e0173f9..10e82f120bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -130,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); - // UIToFP can't be expressed in SSE2 + /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); if (normalized) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index a67c70ff25a..61b033c9fcf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -321,7 +321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, { const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; - LLVMValueRef oow; + LLVMValueRef oow = NULL; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; @@ -446,7 +446,12 @@ emit_instruction( { unsigned chan_index; LLVMValueRef src0, src1, src2; - LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + LLVMValueRef tmp0, tmp1, tmp2; + LLVMValueRef tmp3 = NULL; + LLVMValueRef tmp4 = NULL; + LLVMValueRef tmp5 = NULL; + LLVMValueRef tmp6 = NULL; + LLVMValueRef tmp7 = NULL; LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; @@ -1310,7 +1315,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 001311e7031..37587d4f792 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -140,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); unsigned i; + /* check if any of the bound drawing surfaces are this texture */ if(llvmpipe->dirty_render_cache) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { if(llvmpipe->framebuffer.cbufs[i] && @@ -150,6 +151,13 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, llvmpipe->framebuffer.zsbuf->texture == texture) return PIPE_REFERENCED_FOR_WRITE; } + + /* check if any of the tex_cache textures are this texture */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (llvmpipe->tex_cache[i] && + llvmpipe->tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { if (llvmpipe->vertex_tex_cache[i] && llvmpipe->vertex_tex_cache[i]->texture == texture) diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 2299566c665..a96c2cad9dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_arrays(draw, mode, start, count); /* - * unmap vertex/index buffers - will cause draw module to flush + * unmap vertex/index buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); @@ -112,6 +112,12 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); /* Note: leave drawing surfaces mapped */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 4abff4ecccc..e8e2e2524ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -128,6 +128,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index b2e75d3b14e..a94cd05ef20 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_debug_dump.h" +#include "draw/draw_context.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" @@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->blend == blend) + return; + + draw_flush(llvmpipe->draw); + llvmpipe->blend = blend; llvmpipe->dirty |= LP_NEW_BLEND; @@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned i, j; + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); if(!llvmpipe->jit_context.blend_color) @@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; if(llvmpipe->depth_stencil) llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index e703964aaa8..acfd7be5f74 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); + const uint num = draw_current_shader_outputs(llvmpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, + src = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (llvmpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 22683ff8b42..f2b8c362644 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -673,7 +673,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->fs = (struct lp_fragment_shader *) fs; + if (llvmpipe->fs == fs) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; } @@ -688,6 +693,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) struct lp_fragment_shader_variant *variant; assert(fs != llvmpipe->fs); + (void) llvmpipe; variant = shader->variants; while(variant) { @@ -723,8 +729,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); - if(shader == PIPE_SHADER_VERTEX) - draw_flush(llvmpipe->draw); + draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); @@ -734,7 +739,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, } if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 4561c6b8456..aa3b5a3f91e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, } void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(llvmpipe->draw, setup); + draw_set_rasterizer_state(llvmpipe->draw, rasterizer); - llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + llvmpipe->rasterizer = rasterizer; llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index ba970cac985..e37ff04f3df 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -51,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, struct llvmpipe_context *lp = llvmpipe_context(pipe); uint i; + draw_flush(lp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 8a761648e7e..884e3878e62 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -70,14 +70,18 @@ fail: void -llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs; - llvmpipe->vs = (const struct lp_vertex_shader *)vs; + if (llvmpipe->vs == vs) + return; - draw_bind_vertex_shader(llvmpipe->draw, - (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL)); + draw_bind_vertex_shader(llvmpipe->draw, + vs ? vs->draw_data : NULL); + + llvmpipe->vs = vs; llvmpipe->dirty |= LP_NEW_VS; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 968c7a2d4aa..faddfb96779 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -330,7 +330,7 @@ test_one(unsigned verbose, fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; - //abort(); + /* abort(); */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h index 9fa6c368125..05fded78e16 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h @@ -115,7 +115,7 @@ extern const struct llvmpipe_cached_tex_tile * lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, union tex_tile_address addr ); -static INLINE const union tex_tile_address +static INLINE union tex_tile_address tex_tile_address( unsigned x, unsigned y, unsigned z, diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 0d01c07fb5e..68520fa4f09 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1085,7 +1085,7 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, const struct pipe_sampler_state *sampler = samp->sampler; unsigned level0, level1, j, imgFilter; int width, height; - float levelBlend; + float levelBlend = 0.0f; choose_mipmap_levels(tgsi_sampler, s, t, p, lodbias, @@ -1241,7 +1241,7 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, /* get/map pipe_surfaces corresponding to 3D tex slices */ unsigned level0, level1, j, imgFilter; int width, height, depth; - float levelBlend; + float levelBlend = 0.0f; const uint face = 0; choose_mipmap_levels(tgsi_sampler, s, t, p, diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 040b01865dd..19d00b58d37 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -29,7 +29,7 @@ #define LP_TILE_SOA_H #include "pipe/p_compiler.h" -#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS +#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ #ifdef __cplusplus diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h index 595481c2cbc..74b472b6531 100644 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ b/src/gallium/drivers/llvmpipe/lp_winsys.h @@ -35,7 +35,7 @@ #define LP_WINSYS_H -#include "pipe/p_compiler.h" // for boolean +#include "pipe/p_compiler.h" /* for boolean */ #include "pipe/p_format.h" diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index e4cf91c005c..0437af3725c 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo, unsigned alignment, unsigned usage, unsigned size) { struct pipe_buffer *pb; - + pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *)); if (!pb) { nouveau_bo_ref(NULL, &bo); @@ -239,5 +239,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) void nouveau_screen_fini(struct nouveau_screen *screen) { + nouveau_channel_free(&screen->channel); } diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 42c77e5e778..4c3e08a43f5 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -23,6 +23,9 @@ #define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) #define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18) +/* use along with GPU_WRITE for 2D-only writes */ +#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) + extern struct pipe_screen * nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 4b33636b2eb..770733a4a17 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -31,26 +31,26 @@ static boolean nv04_init_hwctx(struct nv04_context *nv04) { // requires a valid handle -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); +// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); // OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); OUT_RING(0x40182800); // OUT_RING(1<<20/*no cull*/); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); // OUT_RING(0x24|(1<<6)|(1<<8)); OUT_RING(0x120001a4); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); OUT_RING(0x332213a1); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); OUT_RING(0x11001010); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); OUT_RING(0x0); -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); +// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); // OUT_RING(SCREEN_OFFSET); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); OUT_RING(0xff000000); diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 0cce71ad1de..c152b52119a 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -4,7 +4,7 @@ #define _(m,tf) \ { \ PIPE_FORMAT_##m, \ - NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ + NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ } struct nv04_texture_format { @@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit) return; } - nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER | nv04_fragtex_format(pt->format) - | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE ; } diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index f6458232ae5..25395edfd71 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -93,7 +93,7 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render, static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); + BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); OUT_RINGp(buffer + VERTEX_SIZE * v0,8); OUT_RINGp(buffer + VERTEX_SIZE * v1,8); OUT_RINGp(buffer + VERTEX_SIZE * v2,8); @@ -105,7 +105,7 @@ static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buf static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); + BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); OUT_RINGp(buffer + VERTEX_SIZE * v0,8); OUT_RINGp(buffer + VERTEX_SIZE * v1,8); OUT_RINGp(buffer + VERTEX_SIZE * v2,8); @@ -114,7 +114,7 @@ static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buff static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); + BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); OUT_RINGp(buffer + VERTEX_SIZE * v0,8); OUT_RINGp(buffer + VERTEX_SIZE * v1,8); OUT_RINGp(buffer + VERTEX_SIZE * v2,8); @@ -166,11 +166,11 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con if (numvert<3) break; - BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + BEGIN_RING( fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); for(j = 0; j<numvert; j++) OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); + BEGIN_RING_NI( fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) OUT_RING(striptbl[j]); if (numtri%2) @@ -185,7 +185,7 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const struct nv04_context* nv04 = render->nv04; int i,j; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); for(i = 1; i<nr_indices; i+=14) @@ -195,12 +195,12 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const if (numvert < 3) break; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); for(j=0;j<numvert;j++) OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); + BEGIN_RING_NI(fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) OUT_RING(fantbl[j]); if (numtri%2) diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 170ce3eb7e5..7c5b6e8229a 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->fahrenheit); nv04_surface_2d_takedown(&screen->eng2d); + nouveau_screen_fini(&screen->base); + FREE(pscreen); } @@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) fahrenheit_class = 0; sub3d_class = 0; } else if (dev->chipset >= 0x10) { - fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + fahrenheit_class = NV10_TEXTURED_TRIANGLE; sub3d_class = NV10_CONTEXT_SURFACES_3D; } else { - fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + fahrenheit_class=NV04_TEXTURED_TRIANGLE; sub3d_class = NV04_CONTEXT_SURFACES_3D; } diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index ef3005db5fc..e3dc4c5bf44 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) { switch (wrap) { case PIPE_TEX_WRAP_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; break; case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; break; case PIPE_TEX_WRAP_CLAMP: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP: default: NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; } - return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; + return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; } static void * @@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe, ss = MALLOC(sizeof(struct nv04_sampler_state)); - ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | - (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); if (cso->max_anisotropy > 1.0) { - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; } switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; break; } @@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, case PIPE_TEX_FILTER_LINEAR: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; break; } break; @@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, default: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; break; } break; @@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe, */ rs = MALLOC(sizeof(struct nv04_rasterizer_state)); - rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; return (void *)rs; } @@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); hw->control = float_to_ubyte(cso->alpha.ref_value); - hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); - hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; - hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; - hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; - hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0; + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format return (void *)hw; } @@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe, /* struct nv04_context *nv04 = nv04_context(pipe); // XXX - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); OUT_RING (((s->maxx - s->minx) << 16) | s->minx); OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ } diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index eb2c1c57c67..bd98ae091fd 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -58,7 +58,7 @@ static void nv04_emit_control(struct nv04_context* nv04) { uint32_t control = nv04->dsa->control; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); OUT_RING(control); } @@ -75,7 +75,7 @@ static void nv04_emit_blend(struct nv04_context* nv04) blend|=(nv04_blend_func(nv04->blend->b_src)<<24); blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); OUT_RING(blend); } @@ -84,7 +84,7 @@ static void nv04_emit_sampler(struct nv04_context *nv04, int unit) struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; struct pipe_texture *pt = &nv04mt->base; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); OUT_RING(nv04->sampler[unit]->filter); @@ -163,7 +163,7 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (nv04->dirty & NV04_NEW_CONTROL) { nv04->dirty &= ~NV04_NEW_CONTROL; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); OUT_RING(nv04->dsa->control); } @@ -218,7 +218,7 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (!(nv04->fp_samplers & (1 << i))) continue; struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); } diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 12df7fd1997..b24a9cee5ae 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -77,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format) } static INLINE unsigned -nv04_swizzle_bits(unsigned x, unsigned y) +nv04_swizzle_bits_square(unsigned x, unsigned y) { unsigned u = (x & 0x001) << 0 | (x & 0x002) << 1 | @@ -107,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y) return v | u; } +/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) +{ + unsigned s = MIN2(w, h); + unsigned m = s - 1; + return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); +} + static int nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, @@ -158,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, for (x = 0; x < w; x += sub_w) { sub_w = MIN2(sub_w, w - x); - /* Must be 64-byte aligned */ - assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63)); + assert(!(dst->offset & 63)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format), + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); @@ -491,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen) ctx->fill = nv04_surface_fill; return ctx; } + +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) +{ + int temp_flags; + + // printf("creating temp, flags is %i!\n", flags); + + if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; + } + else + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; + } + + struct nv40_screen* screen = (struct nv40_screen*)pscreen; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + + struct pipe_texture templ; + memset(&templ, 0, sizeof(templ)); + templ.format = ns->base.texture->format; + templ.target = PIPE_TEXTURE_2D; + templ.width0 = ns->base.width; + templ.height0 = ns->base.height; + templ.depth0 = 1; + templ.last_level = 0; + + // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented + templ.nr_samples = ns->base.texture->nr_samples; + + templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); + struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); + temp_ns->backing = ns; + + if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) + eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); + + return temp_ns; +} + diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h index 02b3f56ba8b..ce696a11a39 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.h +++ b/src/gallium/drivers/nv04/nv04_surface_2d.h @@ -4,6 +4,7 @@ struct nv04_surface { struct pipe_surface base; unsigned pitch; + struct nv04_surface* backing; }; struct nv04_surface_2d { @@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen); void nv04_surface_2d_takedown(struct nv04_surface_2d **); +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); + #endif diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 8446073ae80..2dd2e146a8f 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -16,14 +16,14 @@ struct nv04_transfer { }; static void -nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv04_compatible_transfer_tex(pt, level, &tx_tex_template); + nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index e3167814f2b..099ab100433 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv04->constbuf[PIPE_SHADER_VERTEX], nv04->constbuf_nr[PIPE_SHADER_VERTEX]); diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index ee5901e743e..6a39ddeaacb 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->celsius); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index c664973e904..eb04af9782e 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -16,14 +16,14 @@ struct nv10_transfer { }; static void -nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv10_compatible_transfer_tex(pt, level, &tx_tex_template); + nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 441a4f75f3f..0d261412485 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, } draw_set_mapped_constant_buffer(draw, + PIPE_SHADER_VERTEX, nv10->constbuf[PIPE_SHADER_VERTEX], nv10->constbuf_nr[PIPE_SHADER_VERTEX]); diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index d1291a92e0a..8f7538e7f57 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -6,6 +6,7 @@ #include "nv20_context.h" #include "nv20_screen.h" +#include "../nv04/nv04_surface_2d.h" static void nv20_miptree_layout(struct nv20_miptree *nv20mt) @@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv20_miptree_layout(mt); mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size); @@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ns->base.offset = nv20mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base; + return &ns->base; } static void nv20_miptree_surface_destroy(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv20_miptree_surface_destroy(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index 4eeacd1afd5..a0973f1ebdc 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->kelvin); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 0122b1c2cdb..63cba1f4122 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -228,7 +228,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) } /* always do position */ { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->hwfmt[0] |= (1 << 0); } @@ -237,19 +237,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 4; i < 6; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 3)); } if (colors[0]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 3); } if (colors[1]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 4); } @@ -258,7 +258,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 6; i < 10; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 1)); } @@ -267,7 +267,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 0; i < 4; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 9)); } @@ -276,13 +276,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 10; i < 12; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 3)); } if (fog) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << 15); } diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 69b79c809f4..699773e8e6f 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -16,14 +16,14 @@ struct nv20_transfer { }; static void -nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv20_compatible_transfer_tex(pt, level, &tx_tex_template); + nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 84d7db6c5e2..4bf461eba92 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv20->constbuf[PIPE_SHADER_VERTEX], nv20->constbuf_nr[PIPE_SHADER_VERTEX]); diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 46a821a48b1..38b39159f19 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -25,6 +25,12 @@ static void nv30_destroy(struct pipe_context *pipe) { struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (nv30->state.hw[i]) + so_ref(NULL, &nv30->state.hw[i]); + } if (nv30->draw) draw_destroy(nv30->draw); diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 40965a97723..d1ff18e2dfb 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv30_sr(NV30SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV30_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SIN: @@ -870,6 +886,12 @@ void nv30_fragprog_destroy(struct nv30_context *nv30, struct nv30_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index ce95d9700f6..8fbba38e78f 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv30_context.h" +#include "../nv04/nv04_surface_2d.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) @@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv30_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, @@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = nv30mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv30_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv30_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 7cd36902eb4..760467f7367 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -156,6 +156,12 @@ static void nv30_screen_destroy(struct pipe_screen *pscreen) { struct nv30_screen *screen = nv30_screen(pscreen); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->rankine); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 2255a02caed..65598991c68 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -16,14 +16,14 @@ struct nv30_transfer { }; static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv30_compatible_transfer_tex(pt, level, &tx_tex_template); + nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index eb9cce4c786..d56c7a6b49c 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -25,6 +25,12 @@ static void nv40_destroy(struct pipe_context *pipe) { struct nv40_context *nv40 = nv40_context(pipe); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (nv40->state.hw[i]) + so_ref(NULL, &nv40->state.hw[i]); + } if (nv40->draw) draw_destroy(nv40->draw); diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index b2f19ecb699..3875bc35457 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -261,7 +261,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, map, nr); + draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, + map, nr); } draw_arrays(nv40->draw, mode, start, count); @@ -285,7 +286,7 @@ static INLINE void emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, unsigned semantic, unsigned index) { - unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index); unsigned a = nv40->swtnl.nr_attribs++; nv40->swtnl.hw[a] = hw; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 1bf16726d10..bb9c85cc434 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); break; case NV40SR_NONE: sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); @@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv40_sr(NV40SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV40_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, neg(swz(tmp, X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SEQ: @@ -752,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) { struct tgsi_full_immediate *imm; float vals[4]; - + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -836,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->insn[fpc->inst_offset + 1] = 0x00000000; fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - + fp->translated = TRUE; out_err: tgsi_parse_free(&parse); @@ -917,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) update_constants: if (fp->nr_consts) { float *map; - + map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { @@ -948,6 +964,12 @@ void nv40_fragprog_destroy(struct nv40_context *nv40, struct nv40_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index b974e68a077..89bd155ff49 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv40_context.h" +#include "../nv04/nv04_surface_2d.h" @@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv40_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); @@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv40_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv40_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index bd13dfddd1c..d01e7128051 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -140,6 +140,12 @@ static void nv40_screen_destroy(struct pipe_screen *pscreen) { struct nv40_screen *screen = nv40_screen(pscreen); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->curie); + nv04_surface_2d_takedown(&screen->eng2d); nouveau_screen_fini(&screen->base); diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index b084a38b482..791ee6823d3 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -16,14 +16,14 @@ struct nv40_transfer { }; static void -nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv40_compatible_transfer_tex(pt, level, &tx_tex_template); + nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index d21b80eab8d..5997456e4c9 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -43,6 +43,39 @@ nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); + if (nv50->state.fb) + so_ref(NULL, &nv50->state.fb); + if (nv50->state.blend) + so_ref(NULL, &nv50->state.blend); + if (nv50->state.blend_colour) + so_ref(NULL, &nv50->state.blend_colour); + if (nv50->state.zsa) + so_ref(NULL, &nv50->state.zsa); + if (nv50->state.rast) + so_ref(NULL, &nv50->state.rast); + if (nv50->state.stipple) + so_ref(NULL, &nv50->state.stipple); + if (nv50->state.scissor) + so_ref(NULL, &nv50->state.scissor); + if (nv50->state.viewport) + so_ref(NULL, &nv50->state.viewport); + if (nv50->state.tsc_upload) + so_ref(NULL, &nv50->state.tsc_upload); + if (nv50->state.tic_upload) + so_ref(NULL, &nv50->state.tic_upload); + if (nv50->state.vertprog) + so_ref(NULL, &nv50->state.vertprog); + if (nv50->state.fragprog) + so_ref(NULL, &nv50->state.fragprog); + if (nv50->state.programs) + so_ref(NULL, &nv50->state.programs); + if (nv50->state.vtxfmt) + so_ref(NULL, &nv50->state.vtxfmt); + if (nv50->state.vtxbuf) + so_ref(NULL, &nv50->state.vtxbuf); + if (nv50->state.vtxattr) + so_ref(NULL, &nv50->state.vtxattr); + draw_destroy(nv50->draw); FREE(nv50); } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 679c28ce4b1..2d0b1818ef6 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -154,26 +154,17 @@ struct nv50_pc { int if_lvl, loop_lvl; unsigned loop_pos[NV50_MAX_LOOP_NESTING]; + unsigned *insn_pos; /* actual program offset of each TGSI insn */ + boolean in_subroutine; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; boolean allow32; -}; - -static INLINE struct nv50_reg * -reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) -{ - struct nv50_reg *ri; - assert(pc->reg_instance_nr < 16); - ri = &pc->reg_instances[pc->reg_instance_nr++]; - if (reg) { - *ri = *reg; - reg->mod = 0; - } - return ri; -} + uint8_t edgeflag_out; +}; static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) @@ -253,6 +244,21 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) assert(0); } +static INLINE struct nv50_reg * +reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *ri; + + assert(pc->reg_instance_nr < 16); + ri = &pc->reg_instances[pc->reg_instance_nr++]; + if (reg) { + alloc_reg(pc, reg); + *ri = *reg; + reg->mod = 0; + } + return ri; +} + /* XXX: For shaders that aren't executed linearly (e.g. shaders that * contain loops), we need to assign all hw regs to TGSI TEMPs early, * lest we risk temp_temps overwriting regs alloc'd "later". @@ -279,22 +285,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) return NULL; } -/* Assign the hw of the discarded temporary register src - * to the tgsi register dst and free src. - */ -static void -assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - assert(src->index == -1 && src->hw != -1); - - if (dst->hw != -1) - pc->r_temp[dst->hw] = NULL; - pc->r_temp[src->hw] = dst; - dst->hw = src->hw; - - FREE(src); -} - /* release the hardware resource held by r */ static void release_hw(struct nv50_pc *pc, struct nv50_reg *r) @@ -451,10 +441,19 @@ is_immd(struct nv50_program_exec *e) return FALSE; } +static boolean +is_join(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 2) + return TRUE; + return FALSE; +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) { + assert(!is_immd(e)); set_long(pc, e); e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); e->inst[1] |= (pred << 7) | (idx << 12); @@ -497,15 +496,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { - union { - float f; - uint32_t ui; - } u; - u.ui = pc->immd_buf[imm->hw]; - - u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f; - u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f; - set_long(pc, e); /* XXX: can't be predicated - bits overlap; cases where both * are required should be avoided by using pc->allow32 */ @@ -513,8 +503,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) set_pred_wr(pc, 0, 0, e); e->inst[1] |= 0x00000002 | 0x00000001; - e->inst[0] |= (u.ui & 0x3f) << 16; - e->inst[1] |= (u.ui >> 6) << 2; + e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16; + e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2; } static INLINE void @@ -663,6 +653,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } +/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -715,6 +706,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) FREE(imm); } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (pc->if_lvl || pc->loop_lvl || + (dst->type != P_TEMP) || + (src->hw < pc->result_nr * 4 && + pc->p->type == PIPE_SHADER_FRAGMENT) || + pc->p->info.opcode_count[TGSI_OPCODE_CAL] || + pc->p->info.opcode_count[TGSI_OPCODE_BRA]) { + + emit_mov(pc, dst, src); + free_temp(pc, src); + return; + } + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + static void emit_nop(struct nv50_pc *pc) { @@ -886,7 +905,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->mod & NV50_MOD_NEG) + if (src0->mod ^ src1->mod) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { @@ -997,6 +1016,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, op != TGSI_OPCODE_XOR) assert(!"invalid bit op"); + assert(!(src0->mod | src1->mod)); + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { set_immd(pc, src1, e); if (op == TGSI_OPCODE_OR) @@ -1048,6 +1069,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, src2->mod ^= NV50_MOD_NEG; } +#define NV50_FLOP_RCP 0 +#define NV50_FLOP_RSQ 2 +#define NV50_FLOP_LG2 3 +#define NV50_FLOP_SIN 4 +#define NV50_FLOP_COS 5 +#define NV50_FLOP_EX2 6 + +/* rcp, rsqrt, lg2 support neg and abs */ static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -1055,17 +1084,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x90000000; - if (sub) { + if (sub || src->mod) { set_long(pc, e); e->inst[1] |= (sub << 29); } set_dst(pc, dst, e); + set_src_0_restricted(pc, src, e); - if (sub == 0 || sub == 2) - set_src_0_restricted(pc, src, e); - else - set_src_0(pc, src, e); + assert(!src->mod || sub < 4); + + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; emit(pc, e); } @@ -1082,6 +1114,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29) | 0x00004000; + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1097,6 +1134,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29); + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1231,10 +1273,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_reg *temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, v); + emit_flop(pc, NV50_FLOP_LG2, temp, v); emit_mul(pc, temp, temp, e); emit_preex2(pc, temp, temp); - emit_flop(pc, 6, dst, temp); + emit_flop(pc, NV50_FLOP_EX2, dst, temp); free_temp(pc, temp); } @@ -1336,66 +1378,53 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) } static struct nv50_program_exec * -emit_breakaddr(struct nv50_pc *pc) +emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc) { struct nv50_program_exec *e = exec(pc); - e->inst[0] = 0x40000002; + e->inst[0] = (op << 28) | 2; set_long(pc, e); + if (pred >= 0) + set_pred(pc, cc, pred, e); emit(pc, e); return e; } -static void -emit_break(struct nv50_pc *pc, int pred, unsigned cc) +static INLINE struct nv50_program_exec * +emit_breakaddr(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x50000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); + return emit_control_flow(pc, 0x4, -1, 0); +} - emit(pc, e); +static INLINE void +emit_break(struct nv50_pc *pc, int pred, unsigned cc) +{ + emit_control_flow(pc, 0x5, pred, cc); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_joinat(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000002; - set_long(pc, e); - - emit(pc, e); - return e; + return emit_control_flow(pc, 0xa, -1, 0); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_branch(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); + return emit_control_flow(pc, 0x1, pred, cc); +} - e->inst[0] = 0x10000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - emit(pc, e); - return pc->p->exec_tail; +static INLINE struct nv50_program_exec * +emit_call(struct nv50_pc *pc, int pred, unsigned cc) +{ + return emit_control_flow(pc, 0x2, pred, cc); } -static void +static INLINE void emit_ret(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x30000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - - emit(pc, e); + emit_control_flow(pc, 0x3, pred, cc); } #define QOP_ADD 0 @@ -1458,7 +1487,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], if (arg == 4) /* there is no textureProj(samplerCubeShadow) */ emit_mov(pc, t[3], src[3]); - emit_flop(pc, 0, t[2], t[2]); + emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]); emit_mul(pc, t[0], src[0], t[2]); emit_mul(pc, t[1], src[1], t[2]); @@ -1476,7 +1505,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], t[3]->rhw = src[3]->rhw; emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); - emit_flop(pc, 0, t[3], t[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]); for (c = 0; c < dim; ++c) { t[c]->rhw = src[c]->rhw; @@ -1490,7 +1519,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], /* XXX: for some reason the blob sometimes uses MAD * (mad f32 $rX $rY $rZ neg $r63) */ - emit_flop(pc, 0, t[3], src[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]); for (c = 0; c < dim; ++c) emit_mul(pc, t[c], src[c], t[3]); if (arg != dim) /* depth reference value */ @@ -1537,7 +1566,13 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod, struct nv50_reg *src, struct nv50_program_exec *tex) { struct nv50_program_exec *join_at; - unsigned i, target = pc->p->exec_size + 7 * 2; + unsigned i, target = pc->p->exec_size + 9 * 2; + + if (pc->p->type != PIPE_SHADER_FRAGMENT) { + emit(pc, tex); + return; + } + pc->allow32 = FALSE; /* Subtract lod of each pixel from lod of top left pixel, jump * texlod insn if result is 0, then repeat for 2 other pixels. @@ -1663,6 +1698,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit(pc, e); } else if (bias_lod < 0) { + assert(pc->p->type == PIPE_SHADER_FRAGMENT); e->inst[0] |= arg << 22; e->inst[1] |= 0x20000000; /* texbias */ emit_mov(pc, t[arg], src[3]); @@ -1782,20 +1818,24 @@ static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_COS: case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: - case TGSI_OPCODE_MUL: + case TGSI_OPCODE_EX2: case TGSI_OPCODE_KIL: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_MAD: - return TRUE; + case TGSI_OPCODE_MUL: case TGSI_OPCODE_POW: - if (i == 1) - return TRUE; - return FALSE; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */ + case TGSI_OPCODE_SCS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SUB: + return TRUE; default: return FALSE; } @@ -1820,7 +1860,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_DST: return mask & (c ? 0xa : 0x6); case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: @@ -2042,6 +2084,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c) assert(0); return 0x0; } + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_LIT: case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: @@ -2082,6 +2126,8 @@ nv50_kill_branch(struct nv50_pc *pc) if (pc->if_insn[lvl]->next != pc->p->exec_tail) return FALSE; + if (is_immd(pc->p->exec_tail)) + return FALSE; /* if ccode == 'true', the BRA is from an ELSE and the predicate * reg may no longer be valid, since we currently always use $p0 @@ -2215,10 +2261,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_BGNSUB: + assert(!pc->in_subroutine); + pc->in_subroutine = TRUE; + /* probably not necessary, but align to 8 byte boundary */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + break; case TGSI_OPCODE_BRK: assert(pc->loop_lvl > 0); emit_break(pc, -1, 0); break; + case TGSI_OPCODE_CAL: + assert(inst->Label.Label < pc->insn_nr); + emit_call(pc, -1, 0)->param.index = inst->Label.Label; + /* replaced by actual offset in nv50_program_fixup_insns */ + break; case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -2239,17 +2297,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ } break; + case TGSI_OPCODE_CONT: + assert(pc->loop_lvl > 0); + emit_branch(pc, -1, 0)->param.index = + pc->loop_pos[pc->loop_lvl - 1]; + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 5, dst[3], temp); + emit_flop(pc, NV50_FLOP_COS, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, brdc, temp); + emit_flop(pc, NV50_FLOP_COS, brdc, temp); break; case TGSI_OPCODE_DDX: for (c = 0; c < 4; c++) { @@ -2321,9 +2384,40 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDSUB: + assert(pc->in_subroutine); + pc->in_subroutine = FALSE; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, brdc, temp); + emit_flop(pc, NV50_FLOP_EX2, brdc, temp); + break; + case TGSI_OPCODE_EXP: + { + struct nv50_reg *t[2]; + + assert(!temp); + t[0] = temp_temp(pc); + t[1] = temp_temp(pc); + + if (mask & 0x6) + emit_mov(pc, t[0], src[0][0]); + if (mask & 0x3) + emit_flr(pc, t[1], src[0][0]); + + if (mask & (1 << 1)) + emit_sub(pc, dst[1], t[0], t[1]); + if (mask & (1 << 0)) { + emit_preex2(pc, t[1], t[1]); + emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]); + } + if (mask & (1 << 2)) { + emit_preex2(pc, t[0], t[0]); + emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -2363,7 +2457,35 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - emit_flop(pc, 3, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]); + break; + case TGSI_OPCODE_LOG: + { + struct nv50_reg *t[2]; + + t[0] = temp_temp(pc); + if (mask & (1 << 1)) + t[1] = temp_temp(pc); + else + t[1] = t[0]; + + emit_abs(pc, t[0], src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], t[1]); + emit_flr(pc, t[1], t[1]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], t[1]); + if (mask & (1 << 1)) { + t[1]->mod = NV50_MOD_NEG; + emit_preex2(pc, t[1], t[1]); + t[1]->mod = 0; + emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]); + emit_mul(pc, dst[1], t[0], t[1]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); @@ -2413,24 +2535,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - emit_flop(pc, 0, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: - if (pc->p->type == PIPE_SHADER_FRAGMENT) + if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine) nv50_fp_move_results(pc); emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: - emit_flop(pc, 2, brdc, src[0][0]); + src[0][0]->mod |= NV50_MOD_ABS; + emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; case TGSI_OPCODE_SCS: temp = temp_temp(pc); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) - emit_flop(pc, 5, dst[0], temp); + emit_flop(pc, NV50_FLOP_COS, dst[0], temp); if (mask & (1 << 1)) - emit_flop(pc, 4, dst[1], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[1], temp); if (mask & (1 << 2)) emit_mov_immdval(pc, dst[2], 0.0); if (mask & (1 << 3)) @@ -2439,14 +2562,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 4, dst[3], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, brdc, temp); + emit_flop(pc, NV50_FLOP_SIN, brdc, temp); break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: @@ -2510,6 +2633,17 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_END: + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); + + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + else + if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + emit_nop(pc); + + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -2554,10 +2688,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) - reg = pc->temp; + reg = pc->temp; else - if (dst->File == TGSI_FILE_OUTPUT) - reg = pc->result; + if (dst->File == TGSI_FILE_OUTPUT) { + reg = pc->result; + + if (insn->Instruction.Opcode == TGSI_OPCODE_MOV && + dst->Index == pc->edgeflag_out && + insn->Src[0].Register.File == TGSI_FILE_INPUT) + pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index; + } if (reg) { for (c = 0; c < 4; c++) { @@ -2724,7 +2864,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } pc->r_brdc = NULL; - if (!deqs) + if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3])) return nv50_program_tx_insn(pc, &insn); deqs = nv50_revdep_reorder(m, rdep); @@ -2775,7 +2915,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); - emit_flop(pc, 0, iv, iv); + emit_flop(pc, NV50_FLOP_RCP, iv, iv); /* XXX: when loading interpolants dynamically, move these * to the program head, or make sure it can't be skipped. @@ -2856,6 +2996,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (p->cfg.io_nr > first) p->cfg.io_nr = first; break; + case TGSI_SEMANTIC_EDGEFLAG: + pc->edgeflag_out = first; + break; /* case TGSI_SEMANTIC_CLIP_DISTANCE: p->cfg.clpd = MIN2(p->cfg.clpd, first); @@ -3104,6 +3247,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; @@ -3192,16 +3337,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc) if (e->param.index >= 0 && !e->param.mask) bra_list[n++] = e; - /* last instruction must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - /* set exit bit */ - pc->p->exec_tail->inst[1] |= 1; - - /* !immd on exit insn simultaneously means !join */ - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); - /* Make sure we don't have any single 32 bit instructions. */ for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; @@ -3210,12 +3345,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc) for (i = 0; i < n; ++i) if (bra_list[i]->param.index >= pos) bra_list[i]->param.index += 1; + for (i = 0; i < pc->insn_nr; ++i) + if (pc->insn_pos[i] >= pos) + pc->insn_pos[i] += 1; convert_to_long(pc, e); ++pos; } } FREE(bra_list); + + if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL]) + return; + + /* fill in CALL offsets */ + for (e = pc->p->exec_head; e; e = e->next) { + if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2) + e->param.index = pc->insn_pos[e->param.index]; + } } static boolean @@ -3237,19 +3384,20 @@ nv50_program_tx(struct nv50_program *p) if (ret == FALSE) goto out_cleanup; + pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned)); + tgsi_parse_init(&parse, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { const union tgsi_full_token *tok = &parse.FullToken; - /* don't allow half insn/immd on first and last instruction */ + /* previously allow32 was FALSE for first & last instruction */ pc->allow32 = TRUE; - if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) - pc->allow32 = FALSE; tgsi_parse_token(&parse); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_pos[pc->insn_cur] = pc->p->exec_size; ++pc->insn_cur; ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) @@ -3260,9 +3408,6 @@ nv50_program_tx(struct nv50_program *p) } } - if (pc->p->type == PIPE_SHADER_FRAGMENT) - nv50_fp_move_results(pc); - nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; @@ -3480,7 +3625,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); + so_method(so, tesla, NV50TCL_FP_CONTROL, 1); so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); so_data (so, p->cfg.regs[3]); @@ -3652,7 +3797,7 @@ nv50_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); so_data (so, reg[4]); - so_method(so, tesla, 0x1540, 4); + so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4); so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 4a90c372ce3..461fec1d89c 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -58,6 +58,7 @@ struct nv50_program { /* VP only */ uint8_t clpd, clpd_nr; uint8_t psiz; + uint8_t edgeflag_in; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 268c9823f7d..5d9e18218ae 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(chan, tesla, 0x1530, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, 0x1514, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); OUT_RING (chan, 1); q->ready = FALSE; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index d443ca3ad06..7e039ea82ec 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; + return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case NOUVEAU_CAP_HW_VTXBUF: @@ -165,6 +165,21 @@ static void nv50_screen_destroy(struct pipe_screen *pscreen) { struct nv50_screen *screen = nv50_screen(pscreen); + unsigned i; + + for (i = 0; i < 2; i++) { + if (screen->constbuf_parm[i]) + nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); + } + + if (screen->constbuf_misc[0]) + nouveau_bo_ref(NULL, &screen->constbuf_misc[0]); + if (screen->tic) + nouveau_bo_ref(NULL, &screen->tic); + if (screen->tsc) + nouveau_bo_ref(NULL, &screen->tsc); + if (screen->static_init) + so_ref(NULL, &screen->static_init); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->tesla); @@ -231,8 +246,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) break; case 0x80: case 0x90: - /* this stupid name should be corrected. */ - tesla_class = NV54TCL; + tesla_class = NV84TCL; break; case 0xa0: switch (chipset) { @@ -242,7 +256,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NVA0TCL; break; default: - tesla_class = 0x8597; + tesla_class = NVA8TCL; break; } break; @@ -287,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, chan->vram->handle); so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); so_data (so, NV50_2D_OPERATION_SRCCOPY); - so_method(so, screen->eng2d, 0x0290, 1); + so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); so_data (so, 0); so_method(so, screen->eng2d, 0x0888, 1); so_data (so, 1); @@ -297,34 +311,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Static tesla init */ so = so_new(256, 20); - so_method(so, screen->tesla, 0x1558, 1); - so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); + so_data (so, NV50TCL_COND_MODE_ALWAYS); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), - NV50TCL_DMA_UNK0__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11); + for (i = 0; i < 11; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), - NV50TCL_DMA_UNK1__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0), + NV50TCL_DMA_COLOR__SIZE); + for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, 0x121c, 1); + so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1); so_data (so, 1); /* activate all 32 lanes (threads) in a warp */ - so_method(so, screen->tesla, 0x19a0, 1); + so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1); so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, 0x13b4, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); so_data (so, 0x54); - so_method(so, screen->tesla, 0x13bc, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ - so_method(so, screen->tesla, 0x13ac, 1); + so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); @@ -360,7 +373,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) // B = buffer ID (maybe more than 1 byte) // N = CB index used in shader instruction // P = program type (0 = VP, 2 = GP, 3 = FP) - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x000BBNP1); */ @@ -424,23 +437,26 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { - so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2); + so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } - so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ - so_method(so, screen->tesla, 0x1234, 1); + so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); /* activate first scissor rectangle */ - so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1); + so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, 1); /* default edgeflag to TRUE */ + so_emit(chan, so); so_ref (so, &screen->static_init); so_ref (NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 88aef52d08c..30b2b0f91bf 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -295,7 +295,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : NV50TCL_SHADE_MODEL_SMOOTH); - so_method(so, tesla, 0x1684, 1); + so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1); so_data (so, cso->flatshade_first ? 0 : 1); so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); @@ -392,7 +392,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); so_data (so, fui(cso->offset_scale)); so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); - so_data (so, fui(cso->offset_units)); + so_data (so, fui(cso->offset_units * 2.0f)); } rso->pipe = *cso; @@ -439,9 +439,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - /* XXX: keep hex values until header is updated (names reversed) */ if (cso->stencil[0].enabled) { - so_method(so, tesla, 0x1380, 8); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); @@ -451,23 +450,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, cso->stencil[0].writemask); so_data (so, cso->stencil[0].valuemask); } else { - so_method(so, tesla, 0x1380, 1); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, 0x1594, 5); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, tesla, 0x0f54, 3); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[1].ref_value); so_data (so, cso->stencil[1].writemask); so_data (so, cso->stencil[1].valuemask); } else { - so_method(so, tesla, 0x1594, 1); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 871e8097b65..c8bdf9dc276 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. * Ambiguous assignment results in no rendering (no DATA_ERROR). */ - so_method(so, tesla, 0x121c, 1); + so_method(so, tesla, NV50TCL_RT_CONTROL, 1); so_data (so, fb->nr_cbufs | (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); @@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1224, 1); + so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); so_data (so, 1); } @@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); } else { - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 0); } - so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); so_data (so, w << 16); so_data (so, h << 16); /* set window lower left corner */ - so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2); + so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2); so_data (so, 0); so_data (so, 0); /* set screen scissor rectangle */ @@ -325,7 +325,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50->state.scissor_enabled = rast->scissor; so = so_new(3, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); so_data(so, (s->maxy << 16) | s->miny); @@ -355,11 +355,11 @@ scissor_uptodate: so = so_new(14, 0); if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); @@ -440,7 +440,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50, so_data (so, 1); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); so_data (so, 0); so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 79655fc08d5..6378132979e 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, if (ret) return; - BEGIN_RING(chan, eng2d, 0x0580, 3); - OUT_RING (chan, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); + OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); OUT_RING (chan, format); OUT_RING (chan, value); - BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); OUT_RING (chan, destx); OUT_RING (chan, desty); OUT_RING (chan, width); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 4d9afa6fedc..a2f1db2914c 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); OUT_RING (chan, src_pitch); src_offset += (sy * src_pitch) + (sx * cpp); } else { @@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); OUT_RING (chan, dst_pitch); dst_offset += (dy * dst_pitch) + (dx * cpp); } else { @@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RELOCh(chan, src_bo, src_offset, src_reloc); OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); if (src_bo->tile_flags) { @@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, dst_offset += (line_count * dst_pitch); } BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); + NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); OUT_RING (chan, width * cpp); OUT_RING (chan, line_count); OUT_RING (chan, 0x00000101); @@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50, /* NV50_2D_OPERATION_SRCCOPY assumed already set */ - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); OUT_RING (chan, 0); OUT_RING (chan, src_format); BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); @@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50, src += src_pitch; } - BEGIN_RING(chan, tesla, 0x1440, 1); + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f7fa0659e8c..602adfc50de 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c) { static const uint32_t hw_values[] = { 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16, 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 }; + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 }; /* we'd also have R11G11B10 and R10G10B10A2 */ @@ -198,7 +198,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, return nv50_push_elements_u08(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -208,7 +208,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -231,7 +231,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, return nv50_push_elements_u16(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -241,7 +241,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -266,7 +266,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, 0x400015e8, nr); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -372,6 +372,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so_data (so, fui(v[1])); break; case 1: + if (attrib == nv50->vertprog->cfg.edgeflag_in) { + so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, v[0] ? 1 : 0); + } so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; @@ -401,6 +405,9 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; + if (nv50->vertprog->cfg.edgeflag_in < 16) + nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ + n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; @@ -445,7 +452,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); /* vertex array limits */ - so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2); + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_reloc (vtxbuf, bo, vb->buffer->size - 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -479,6 +486,9 @@ struct nv50_vbo_emitctx unsigned nr_ve; unsigned vtx_dwords; unsigned vtx_max; + + float edgeflag; + unsigned ve_edgeflag; }; static INLINE void @@ -622,6 +632,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, if (nv50_map_vbufs(nv50) == FALSE) return FALSE; + emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in; + + emit->edgeflag = 0.5f; emit->nr_ve = 0; emit->vtx_dwords = 0; @@ -644,7 +657,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, desc = util_format_description(ve->src_format); assert(desc); - size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); + size = util_format_get_component_bits( + ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); assert(ve->nr_components > 0 && ve->nr_components <= 4); @@ -686,10 +700,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, } emit->vtx_max = 512 / emit->vtx_dwords; + if (emit->ve_edgeflag < 16) + emit->vtx_max = 1; return TRUE; } +static INLINE void +set_edgeflag(struct nouveau_channel *chan, + struct nouveau_grobj *tesla, + struct nv50_vbo_emitctx *emit, uint32_t index) +{ + unsigned i = emit->ve_edgeflag; + + if (i < 16) { + float f = *((float *)(emit->map[i] + index * emit->stride[i])); + + if (emit->edgeflag != f) { + emit->edgeflag = f; + + BEGIN_RING(chan, tesla, 0x15e4, 1); + OUT_RING (chan, f ? 1 : 0); + } + } +} + static boolean nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) { @@ -704,6 +739,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); @@ -729,6 +766,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -754,6 +793,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -779,6 +820,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 0d2de17be93..183aa17f9b3 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -4,7 +4,12 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') env = env.Clone() # add the paths for r300compiler -env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa']) +env.Append(CPPPATH = [ + '#/src/mesa/drivers/dri/r300/compiler', + '#/src/gallium/winsys/drm/radeon/core', + '#/include', + '#/src/mesa', +]) r300 = env.ConvenienceLibrary( target = 'r300', diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 199ce3a945d..1dc9216a7b2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -997,7 +997,7 @@ validate: goto validate; } } else { - // debug_printf("No VBO while emitting dirty state!\n"); + /* debug_printf("No VBO while emitting dirty state!\n"); */ } if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); @@ -1129,7 +1129,7 @@ validate: */ /* Finally, emit the VBO. */ - //r300_emit_vertex_buffer(r300); + /* r300_emit_vertex_buffer(r300); */ r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d8d08fbe264..0aa1da07f8b 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2638,7 +2638,7 @@ enum { VE_COND_MUX_GTE = 25, VE_SET_GREATER_THAN = 26, VE_SET_EQUAL = 27, - VE_SET_NOT_EQUAL = 28, + VE_SET_NOT_EQUAL = 28 }; enum { @@ -2672,20 +2672,20 @@ enum { ME_PRED_SET_CLR = 25, ME_PRED_SET_INV = 26, ME_PRED_SET_POP = 27, - ME_PRED_SET_RESTORE = 28, + ME_PRED_SET_RESTORE = 28 }; enum { /* R3XX */ PVS_MACRO_OP_2CLK_MADD = 0, - PVS_MACRO_OP_2CLK_M2X_ADD = 1, + PVS_MACRO_OP_2CLK_M2X_ADD = 1 }; enum { PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ - PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */ }; enum { @@ -2694,7 +2694,7 @@ enum { PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ - PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */ }; enum { @@ -2703,7 +2703,7 @@ enum { PVS_SRC_SELECT_Z = 2, /* Select Z Component */ PVS_SRC_SELECT_W = 3, /* Select W Component */ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ - PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ + PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */ }; /* PVS Opcode & Destination Operand Description */ @@ -2742,7 +2742,7 @@ enum { PVS_DST_ADDR_SEL_MASK = 0x3, PVS_DST_ADDR_SEL_SHIFT = 29, PVS_DST_ADDR_MODE_0_MASK = 0x1, - PVS_DST_ADDR_MODE_0_SHIFT = 31, + PVS_DST_ADDR_MODE_0_SHIFT = 31 }; /* PVS Source Operand Description */ @@ -2777,7 +2777,7 @@ enum { PVS_SRC_ADDR_SEL_MASK = 0x3, PVS_SRC_ADDR_SEL_SHIFT = 29, PVS_SRC_ADDR_MODE_1_MASK = 0x0, - PVS_SRC_ADDR_MODE_1_SHIFT = 32, + PVS_SRC_ADDR_MODE_1_SHIFT = 32 }; /*\}*/ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2d70ec2ac94..a89cb633e02 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -335,8 +335,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_set_mapped_element_buffer(r300->draw, 0, NULL); draw_set_mapped_constant_buffer(r300->draw, - r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].count * + PIPE_SHADER_VERTEX, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); draw_arrays(r300->draw, mode, start, count); @@ -361,6 +362,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); int i; + void* indices; if (!u_trim_pipe_prim(mode, &count)) { return FALSE; @@ -377,12 +379,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(r300->draw, indexSize, minIndex, maxIndex, indices); draw_set_mapped_constant_buffer(r300->draw, + PIPE_SHADER_VERTEX, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -474,7 +477,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, PIPE_BUFFER_USAGE_CPU_WRITE); - return (r300render->vbo_ptr + r300render->vbo_offset); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 29bc701a86e..727ae7ade6d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -71,9 +71,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300, struct tgsi_shader_info* info = &r300->vs->info; int output; - output = draw_find_vs_output(r300->draw, - info->output_semantic_name[index], - info->output_semantic_index[index]); + output = draw_find_shader_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 096cdb20bbe..a792c2cf989 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index f98087deb8c..5f130453c39 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,6 +36,7 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_tile_cache.h" @@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, if (softpipe->no_rast) return; + if (!softpipe_check_render_cond(softpipe)) + return; + #if 0 softpipe_update_derived(softpipe); /* not needed?? */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 969d69d6b42..3ac807d4b5c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, } +static void +softpipe_render_condition( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + + softpipe->render_cond_query = query; + softpipe->render_cond_mode = mode; +} + + + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -191,6 +204,7 @@ softpipe_create( struct pipe_screen *screen ) #endif softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = screen->winsys; softpipe->pipe.screen = screen; @@ -222,6 +236,10 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + softpipe->pipe.create_gs_state = softpipe_create_gs_state; + softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; + softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; + softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_clip_state = softpipe_set_clip_state; softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; @@ -249,6 +267,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe_init_query_funcs( softpipe ); + softpipe->pipe.render_condition = softpipe_render_condition; + /* * Alloc caches for accessing drawing surfaces and textures. * Must be before quad stage setup! diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 8ce20c5744c..73fa744f9d4 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -58,6 +58,7 @@ struct softpipe_context { struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; struct sp_vertex_shader *vs; + struct sp_geometry_shader *gs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -115,6 +116,10 @@ struct softpipe_context { unsigned line_stipple_counter; + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; @@ -147,6 +152,7 @@ struct softpipe_context { unsigned use_sse : 1; unsigned dump_fs : 1; + unsigned dump_gs : 1; unsigned no_rast : 1; }; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 14cb1322e1d..87312ae1510 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,6 +38,7 @@ #include "util/u_prim.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_state.h" #include "draw/draw_context.h" @@ -48,7 +49,7 @@ static void softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; - uint i, size; + uint i, vssize, gssize; for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) @@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) } if (sp->constants[PIPE_SHADER_VERTEX].buffer) - size = sp->constants[PIPE_SHADER_VERTEX].buffer->size; + vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size; else - size = 0; + vssize = 0; - draw_set_mapped_constant_buffer(sp->draw, + if (sp->constants[PIPE_SHADER_GEOMETRY].buffer) + gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size; + else + gssize = 0; + + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], - size); + vssize); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, + sp->mapped_constants[PIPE_SHADER_GEOMETRY], + gssize); } @@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); - for (i = 0; i < 2; i++) { + for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) ws->buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; @@ -220,6 +230,9 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, struct draw_context *draw = sp->draw; unsigned i; + if (!softpipe_check_render_cond(sp)) + return TRUE; + sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) { diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 5fbac06a535..7f573aef3c3 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index fe6b6cec353..d9babe81dad 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs, static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; - float source[4][QUAD_SIZE]; + float source[4][QUAD_SIZE] = { { 0 } }; /* * Compute src/first term RGB diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 379cf4ad064..4ef5d9f7b1d 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe, } +/** + * Called by rendering function to check rendering is conditional. + * \return TRUE if we should render, FALSE if we should skip rendering + */ +boolean +softpipe_check_render_cond(struct softpipe_context *sp) +{ + struct pipe_context *pipe = &sp->pipe; + boolean b, wait; + uint64_t result; + + if (!sp->render_cond_query) { + return TRUE; /* no query predicate, draw normally */ + } + + wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT || + sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result); + if (b) + return result > 0; + else + return TRUE; +} + + void softpipe_init_query_funcs(struct softpipe_context *softpipe ) { softpipe->pipe.create_query = softpipe_create_query; diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h index 05060a45759..736c033897e 100644 --- a/src/gallium/drivers/softpipe/sp_query.h +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -32,6 +32,10 @@ #ifndef SP_QUERY_H #define SP_QUERY_H +extern boolean +softpipe_check_render_cond(struct softpipe_context *sp); + + struct softpipe_context; extern void softpipe_init_query_funcs(struct softpipe_context * ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 615581b95f9..3da75364c5d 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup ) } /* Note: nr_attrs is only used for debugging (vertex printing) */ - setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw); + setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw); sp->quad.first->begin( sp->quad.first ); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 00da41b9857..f8886565e92 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -50,6 +50,7 @@ #define SP_NEW_VERTEX 0x1000 #define SP_NEW_VS 0x2000 #define SP_NEW_QUERY 0x4000 +#define SP_NEW_GS 0x8000 struct tgsi_sampler; @@ -90,6 +91,11 @@ struct sp_vertex_shader { int max_sampler; /* -1 if no samplers */ }; +/** Subclass of pipe_shader_state */ +struct sp_geometry_shader { + struct pipe_shader_state shader; + struct draw_geometry_shader *draw_data; +}; void * @@ -143,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *, const struct pipe_shader_state *); void softpipe_bind_vs_state(struct pipe_context *, void *); void softpipe_delete_vs_state(struct pipe_context *, void *); +void *softpipe_create_gs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_gs_state(struct pipe_context *, void *); +void softpipe_delete_gs_state(struct pipe_context *, void *); void softpipe_set_polygon_stipple( struct pipe_context *, const struct pipe_poly_stipple * ); diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index efed082f823..95ab3234337 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -29,6 +29,7 @@ */ #include "util/u_memory.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" @@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend = (struct pipe_blend_state *)blend; softpipe->dirty |= SP_NEW_BLEND; @@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend_color = *blend_color; softpipe->dirty |= SP_NEW_BLEND; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index c24a737d07b..f6856a5f691 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(softpipe->draw); + const uint num = draw_current_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + src = draw_find_shader_output(softpipe->draw, + spfs->info.input_semantic_name[i], + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + softpipe->psize_slot = draw_find_shader_output(softpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b41f7e8ab72..aa12bb215a8 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->fs = (struct sp_fragment_shader *) fs; + draw_flush(softpipe->draw); + + if (softpipe->fs == fs) + return; + + draw_flush(softpipe->draw); + + softpipe->fs = fs; softpipe->dirty |= SP_NEW_FS; } @@ -159,9 +166,75 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + draw_flush(softpipe->draw); + /* note: reference counting */ pipe_buffer_reference(&softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); softpipe->dirty |= SP_NEW_CONSTANTS; } + +void * +softpipe_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_geometry_shader *state; + + state = CALLOC_STRUCT(sp_geometry_shader); + if (state == NULL ) + goto fail; + + /* debug */ + if (softpipe->dump_gs) + tgsi_dump(templ->tokens, 0); + + /* copy shader tokens, the ones passed in will go away. + */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (state->shader.tokens == NULL) + goto fail; + + state->draw_data = draw_create_geometry_shader(softpipe->draw, templ); + if (state->draw_data == NULL) + goto fail; + + return state; + +fail: + if (state) { + FREE( (void *)state->shader.tokens ); + FREE( state->draw_data ); + FREE( state ); + } + return NULL; +} + + +void +softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->gs = (struct sp_geometry_shader *)gs; + + draw_bind_geometry_shader(softpipe->draw, + (softpipe->gs ? softpipe->gs->draw_data : NULL)); + + softpipe->dirty |= SP_NEW_GS; +} + + +void +softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_geometry_shader *state = + (struct sp_geometry_shader *)gs; + + draw_delete_geometry_shader(softpipe->draw, + (state) ? state->draw_data : 0); + FREE(state); +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 87b72196838..a5b00336d44 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe, } void softpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct softpipe_context *softpipe = softpipe_context(pipe); + if (softpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(softpipe->draw, setup); + draw_set_rasterizer_state(softpipe->draw, rasterizer); - softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + softpipe->rasterizer = rasterizer; softpipe->dirty |= SP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index a518248bb18..f6154109ea8 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -51,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, struct softpipe_context *sp = softpipe_context(pipe); uint i; + draw_flush(sp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index c3de12b4a39..af99c9de37c 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -29,6 +29,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "util/u_upload_mgr.h" #include "svga_context.h" @@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe ) u_upload_destroy( svga->upload_vb ); u_upload_destroy( svga->upload_ib ); + util_bitmask_destroy( svga->vs_bm ); + util_bitmask_destroy( svga->fs_bm ); + for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader) pipe_buffer_reference( &svga->curr.cb[shader], NULL ); @@ -130,7 +134,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga = CALLOC_STRUCT(svga_context); if (svga == NULL) - goto error1; + goto no_svga; svga->pipe.winsys = screen->winsys; svga->pipe.screen = screen; @@ -142,7 +146,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->swc = svgascreen->sws->context_create(svgascreen->sws); if(!svga->swc) - goto error2; + goto no_swc; svga_init_blend_functions(svga); svga_init_blit_functions(svga); @@ -165,32 +169,40 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); if (!svga_init_swtnl(svga)) - goto error3; + goto no_swtnl; + + svga->fs_bm = util_bitmask_create(); + if (svga->fs_bm == NULL) + goto no_fs_bm; + + svga->vs_bm = util_bitmask_create(); + if (svga->vs_bm == NULL) + goto no_vs_bm; svga->upload_ib = u_upload_create( svga->pipe.screen, 32 * 1024, 16, PIPE_BUFFER_USAGE_INDEX ); if (svga->upload_ib == NULL) - goto error4; + goto no_upload_ib; svga->upload_vb = u_upload_create( svga->pipe.screen, 128 * 1024, 16, PIPE_BUFFER_USAGE_VERTEX ); if (svga->upload_vb == NULL) - goto error5; + goto no_upload_vb; svga->hwtnl = svga_hwtnl_create( svga, svga->upload_ib, svga->swc ); if (svga->hwtnl == NULL) - goto error6; + goto no_hwtnl; ret = svga_emit_initial_state( svga ); if (ret) - goto error7; + goto no_state; /* Avoid shortcircuiting state with initial value of zero. */ @@ -209,19 +221,23 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) return &svga->pipe; -error7: +no_state: svga_hwtnl_destroy( svga->hwtnl ); -error6: +no_hwtnl: u_upload_destroy( svga->upload_vb ); -error5: +no_upload_vb: u_upload_destroy( svga->upload_ib ); -error4: +no_upload_ib: + util_bitmask_destroy( svga->vs_bm ); +no_vs_bm: + util_bitmask_destroy( svga->fs_bm ); +no_fs_bm: svga_destroy_swtnl(svga); -error3: +no_swtnl: svga->swc->destroy(svga->swc); -error2: +no_swc: FREE(svga); -error1: +no_svga: return NULL; } diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 0885d9ca741..fa7f6cb3bb9 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -41,6 +41,7 @@ struct draw_vertex_shader; struct svga_shader_result; struct SVGACmdMemory; +struct util_bitmask; struct u_upload_mgr; @@ -319,12 +320,14 @@ struct svga_context boolean new_vdecl; } swtnl; + /* Bitmask of used shader IDs */ + struct util_bitmask *fs_bm; + struct util_bitmask *vs_bm; + struct { unsigned dirty[4]; unsigned texture_timestamp; - unsigned next_fs_id; - unsigned next_vs_id; /* Internally generated shaders: */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 8db40d0fd57..ca73cf9d5a3 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", - svga_surface(svga->curr.framebuffer.cbufs[0])->handle, + svga->curr.framebuffer.cbufs[0] ? + svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, hwtnl->cmd.prim_count); ret = SVGA3D_BeginDrawPrimitives(swc, diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index e3be840d920..a461a86dd31 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -107,6 +108,8 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->fs_bm, result->id ); + svga_destroy_shader_result( result ); } diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index c104c41f5f8..e82d10c2595 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -172,6 +173,8 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->vs_bm, result->id ); + svga_destroy_shader_result( result ); } diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 6ec38ed3e45..1902b0106ba 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "svga_context.h" #include "svga_state.h" @@ -74,9 +75,12 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->fs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) + goto fail; ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + result->id, SVGA3D_SHADERTYPE_PS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -84,14 +88,16 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_fs_id++; result->next = fs->base.results; fs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->fs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -116,7 +122,7 @@ fail: */ static int emit_white_fs( struct svga_context *svga ) { - int ret; + int ret = PIPE_ERROR; /* ps_3_0 * def c0, 1.000000, 0.000000, 0.000000, 1.000000 @@ -137,16 +143,26 @@ static int emit_white_fs( struct svga_context *svga ) 0x0000ffff, }; + assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); + svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); + if(svga->state.white_fs_id == SVGA3D_INVALID_ID) + goto no_fs_id; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + svga->state.white_fs_id, SVGA3D_SHADERTYPE_PS, white_tokens, sizeof(white_tokens)); if (ret) - return ret; + goto no_definition; - svga->state.white_fs_id = svga->state.next_fs_id++; return 0; + +no_definition: + util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); + svga->state.white_fs_id = SVGA3D_INVALID_ID; +no_fs_id: + return ret; } @@ -251,12 +267,14 @@ static int emit_hw_fs( struct svga_context *svga, assert(id != SVGA3D_INVALID_ID); - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_PS, - id ); - if (ret) - return ret; + if (result != svga->state.hw_draw.fs) { + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, + id ); + if (ret) + return ret; + } svga->dirty |= SVGA_NEW_FS_RESULT; svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 82e7874e2a8..c614281858d 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "translate/translate.h" #include "svga_context.h" @@ -78,8 +79,12 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->vs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) + goto fail; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_vs_id, + result->id, SVGA3D_SHADERTYPE_VS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -87,14 +92,16 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_vs_id++; result->next = vs->base.results; vs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->vs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -142,12 +149,14 @@ static int emit_hw_vs( struct svga_context *svga, id = result->id; } - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_VS, - id ); - if (ret) - return ret; + if (result != svga->state.hw_draw.vs) { + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, + id ); + if (ret) + return ret; + } svga->dirty |= SVGA_NEW_VS_RESULT; svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 8b14c913f72..7655121bec1 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga, PIPE_BUFFER_USAGE_CPU_READ); assert(map); draw_set_mapped_constant_buffer( - draw, + draw, PIPE_SHADER_VERTEX, map, svga->curr.cb[PIPE_SHADER_VERTEX]->size); } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 25b8c2af3a0..94b6ccc62dd 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -156,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) memset(vdecl, 0, sizeof(vdecl)); /* always add position */ - src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->attrib[0].emit = EMIT_4F; vdecl[0].array.offset = offset; @@ -169,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) for (i = 0; i < fs->base.info.num_inputs; i++) { unsigned name = fs->base.info.input_semantic_name[i]; unsigned index = fs->base.info.input_semantic_index[i]; - src = draw_find_vs_output(draw, name, index); + src = draw_find_shader_output(draw, name, index); vdecl[nr_decls].array.offset = offset; vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i]; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index b8ef137c015..0cd620189b7 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "svgadump/svga_shader_dump.h" @@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader, result->tokens = (const unsigned *)emit.buf; result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); memcpy(&result->key, &key, sizeof key); + result->id = UTIL_BITMASK_INVALID_INDEX; if (SVGA_DEBUG & DEBUG_TGSI) { diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 1670da8bfa9..dc5eb8fc606 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_I2F: case TGSI_OPCODE_NOT: case TGSI_OPCODE_SHL: - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: case TGSI_OPCODE_XOR: return FALSE; diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index e6d4a74e868..d59fb89a58c 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1444,6 +1444,312 @@ dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd) void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; + + switch(cmd_id) { + case SVGA_3D_CMD_SURFACE_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + { + const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; + dump_SVGA3dCmdDefineSurface(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dSize) <= next) { + dump_SVGA3dSize((const SVGA3dSize *)body); + body += sizeof(SVGA3dSize); + } + } + break; + case SVGA_3D_CMD_SURFACE_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + { + const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; + dump_SVGA3dCmdDestroySurface(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_COPY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + { + const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; + dump_SVGA3dCmdSurfaceCopy(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + } + break; + case SVGA_3D_CMD_SURFACE_STRETCHBLT: + _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + { + const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; + dump_SVGA3dCmdSurfaceStretchBlt(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_DMA: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + { + const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; + dump_SVGA3dCmdSurfaceDMA(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { + dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); + body += sizeof(SVGA3dCmdSurfaceDMASuffix); + } + } + break; + case SVGA_3D_CMD_CONTEXT_DEFINE: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + { + const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; + dump_SVGA3dCmdDefineContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CONTEXT_DESTROY: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + { + const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; + dump_SVGA3dCmdDestroyContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTRANSFORM: + _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + { + const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; + dump_SVGA3dCmdSetTransform(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETZRANGE: + _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + { + const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; + dump_SVGA3dCmdSetZRange(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETRENDERSTATE: + _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + { + const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; + dump_SVGA3dCmdSetRenderState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRenderState) <= next) { + dump_SVGA3dRenderState((const SVGA3dRenderState *)body); + body += sizeof(SVGA3dRenderState); + } + } + break; + case SVGA_3D_CMD_SETRENDERTARGET: + _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + { + const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; + dump_SVGA3dCmdSetRenderTarget(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTEXTURESTATE: + _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + { + const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; + dump_SVGA3dCmdSetTextureState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dTextureState) <= next) { + dump_SVGA3dTextureState((const SVGA3dTextureState *)body); + body += sizeof(SVGA3dTextureState); + } + } + break; + case SVGA_3D_CMD_SETMATERIAL: + _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + { + const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; + dump_SVGA3dCmdSetMaterial(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTDATA: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + { + const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; + dump_SVGA3dCmdSetLightData(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTENABLED: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + { + const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; + dump_SVGA3dCmdSetLightEnabled(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETVIEWPORT: + _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + { + const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; + dump_SVGA3dCmdSetViewport(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETCLIPPLANE: + _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + { + const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; + dump_SVGA3dCmdSetClipPlane(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CLEAR: + _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + { + const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; + dump_SVGA3dCmdClear(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRect) <= next) { + dump_SVGA3dRect((const SVGA3dRect *)body); + body += sizeof(SVGA3dRect); + } + } + break; + case SVGA_3D_CMD_PRESENT: + _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + { + const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; + dump_SVGA3dCmdPresent(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyRect) <= next) { + dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); + body += sizeof(SVGA3dCopyRect); + } + } + break; + case SVGA_3D_CMD_SHADER_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + { + const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; + dump_SVGA3dCmdDefineShader(cmd); + body = (const uint8_t *)&cmd[1]; + svga_shader_dump((const uint32_t *)body, + (unsigned)(next - body)/sizeof(uint32_t), + FALSE ); + body = next; + } + break; + case SVGA_3D_CMD_SHADER_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + { + const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; + dump_SVGA3dCmdDestroyShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + { + const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; + dump_SVGA3dCmdSetShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER_CONST: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + { + const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; + dump_SVGA3dCmdSetShaderConst(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_DRAW_PRIMITIVES: + _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + { + const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; + unsigned i, j; + dump_SVGA3dCmdDrawPrimitives(cmd); + body = (const uint8_t *)&cmd[1]; + for(i = 0; i < cmd->numVertexDecls; ++i) { + dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); + body += sizeof(SVGA3dVertexDecl); + } + for(j = 0; j < cmd->numRanges; ++j) { + dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); + body += sizeof(SVGA3dPrimitiveRange); + } + while(body + sizeof(SVGA3dVertexDivisor) <= next) { + dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); + body += sizeof(SVGA3dVertexDivisor); + } + } + break; + case SVGA_3D_CMD_SETSCISSORRECT: + _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + { + const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; + dump_SVGA3dCmdSetScissorRect(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BEGIN_QUERY: + _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + { + const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; + dump_SVGA3dCmdBeginQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_END_QUERY: + _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + { + const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; + dump_SVGA3dCmdEndQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_WAIT_FOR_QUERY: + _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + { + const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; + dump_SVGA3dCmdWaitForQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: + _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); + { + const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; + dump_SVGA3dCmdBlitSurfaceToScreen(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGASignedRect) <= next) { + dump_SVGASignedRect((const SVGASignedRect *)body); + body += sizeof(SVGASignedRect); + } + } + break; + default: + _debug_printf("\t0x%08x\n", cmd_id); + break; + } + + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} + + +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -1458,307 +1764,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; - switch(cmd_id) { - case SVGA_3D_CMD_SURFACE_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); - { - const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; - dump_SVGA3dCmdDefineSurface(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dSize) <= next) { - dump_SVGA3dSize((const SVGA3dSize *)body); - body += sizeof(SVGA3dSize); - } - } - break; - case SVGA_3D_CMD_SURFACE_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); - { - const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; - dump_SVGA3dCmdDestroySurface(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_COPY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); - { - const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; - dump_SVGA3dCmdSurfaceCopy(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - } - break; - case SVGA_3D_CMD_SURFACE_STRETCHBLT: - _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); - { - const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; - dump_SVGA3dCmdSurfaceStretchBlt(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_DMA: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); - { - const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; - dump_SVGA3dCmdSurfaceDMA(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { - dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); - body += sizeof(SVGA3dCmdSurfaceDMASuffix); - } - } - break; - case SVGA_3D_CMD_CONTEXT_DEFINE: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); - { - const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; - dump_SVGA3dCmdDefineContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CONTEXT_DESTROY: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); - { - const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; - dump_SVGA3dCmdDestroyContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTRANSFORM: - _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); - { - const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; - dump_SVGA3dCmdSetTransform(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETZRANGE: - _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); - { - const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; - dump_SVGA3dCmdSetZRange(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETRENDERSTATE: - _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); - { - const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; - dump_SVGA3dCmdSetRenderState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRenderState) <= next) { - dump_SVGA3dRenderState((const SVGA3dRenderState *)body); - body += sizeof(SVGA3dRenderState); - } - } - break; - case SVGA_3D_CMD_SETRENDERTARGET: - _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); - { - const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; - dump_SVGA3dCmdSetRenderTarget(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTEXTURESTATE: - _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); - { - const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; - dump_SVGA3dCmdSetTextureState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dTextureState) <= next) { - dump_SVGA3dTextureState((const SVGA3dTextureState *)body); - body += sizeof(SVGA3dTextureState); - } - } - break; - case SVGA_3D_CMD_SETMATERIAL: - _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); - { - const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; - dump_SVGA3dCmdSetMaterial(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTDATA: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); - { - const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; - dump_SVGA3dCmdSetLightData(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTENABLED: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); - { - const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; - dump_SVGA3dCmdSetLightEnabled(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETVIEWPORT: - _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); - { - const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; - dump_SVGA3dCmdSetViewport(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETCLIPPLANE: - _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); - { - const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; - dump_SVGA3dCmdSetClipPlane(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CLEAR: - _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); - { - const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; - dump_SVGA3dCmdClear(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRect) <= next) { - dump_SVGA3dRect((const SVGA3dRect *)body); - body += sizeof(SVGA3dRect); - } - } - break; - case SVGA_3D_CMD_PRESENT: - _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); - { - const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; - dump_SVGA3dCmdPresent(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyRect) <= next) { - dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); - body += sizeof(SVGA3dCopyRect); - } - } - break; - case SVGA_3D_CMD_SHADER_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); - { - const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; - dump_SVGA3dCmdDefineShader(cmd); - body = (const uint8_t *)&cmd[1]; - svga_shader_dump((const uint32_t *)body, - (unsigned)(next - body)/sizeof(uint32_t), - FALSE ); - body = next; - } - break; - case SVGA_3D_CMD_SHADER_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); - { - const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; - dump_SVGA3dCmdDestroyShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); - { - const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; - dump_SVGA3dCmdSetShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER_CONST: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); - { - const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; - dump_SVGA3dCmdSetShaderConst(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_DRAW_PRIMITIVES: - _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); - { - const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; - unsigned i, j; - dump_SVGA3dCmdDrawPrimitives(cmd); - body = (const uint8_t *)&cmd[1]; - for(i = 0; i < cmd->numVertexDecls; ++i) { - dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); - body += sizeof(SVGA3dVertexDecl); - } - for(j = 0; j < cmd->numRanges; ++j) { - dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); - body += sizeof(SVGA3dPrimitiveRange); - } - while(body + sizeof(SVGA3dVertexDivisor) <= next) { - dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); - body += sizeof(SVGA3dVertexDivisor); - } - } - break; - case SVGA_3D_CMD_SETSCISSORRECT: - _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); - { - const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; - dump_SVGA3dCmdSetScissorRect(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BEGIN_QUERY: - _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); - { - const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; - dump_SVGA3dCmdBeginQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_END_QUERY: - _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); - { - const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; - dump_SVGA3dCmdEndQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_WAIT_FOR_QUERY: - _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); - { - const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; - dump_SVGA3dCmdWaitForQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: - _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); - { - const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; - dump_SVGA3dCmdBlitSurfaceToScreen(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGASignedRect) <= next) { - dump_SVGASignedRect((const SVGASignedRect *)body); - body += sizeof(SVGASignedRect); - } - } - break; - default: - _debug_printf("\t0x%08x\n", cmd_id); - break; - } - - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h index 69a87020875..ca0154361cc 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.h +++ b/src/gallium/drivers/svga/svgadump/svga_dump.h @@ -28,6 +28,9 @@ #include "pipe/p_compiler.h" +void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size); + void svga_dump_commands(const void *commands, uint32_t size); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index a1ada29ef84..0bc0b3ae317 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -208,6 +208,56 @@ cmds = [ def dump_cmds(): print r''' void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; +''' + print ' switch(cmd_id) {' + indexes = 'ijklmn' + for id, header, body, footer in cmds: + print ' case %s:' % id + print ' _debug_printf("\\t%s\\n");' % id + print ' {' + print ' const %s *cmd = (const %s *)body;' % (header, header) + if len(body): + print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' + print ' dump_%s(cmd);' % header + print ' body = (const uint8_t *)&cmd[1];' + for i in range(len(body)): + struct, count = body[i] + idx = indexes[i] + print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) + print ' dump_%s((const %s *)body);' % (struct, struct) + print ' body += sizeof(%s);' % struct + print ' }' + if footer is not None: + print ' while(body + sizeof(%s) <= next) {' % footer + print ' dump_%s((const %s *)body);' % (footer, footer) + print ' body += sizeof(%s);' % footer + print ' }' + if id == 'SVGA_3D_CMD_SHADER_DEFINE': + print ' svga_shader_dump((const uint32_t *)body,' + print ' (unsigned)(next - body)/sizeof(uint32_t),' + print ' FALSE);' + print ' body = next;' + print ' }' + print ' break;' + print ' default:' + print ' _debug_printf("\\t0x%08x\\n", cmd_id);' + print ' break;' + print ' }' + print r''' + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} +''' + print r''' +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -222,51 +272,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; -''' - print ' switch(cmd_id) {' - indexes = 'ijklmn' - for id, header, body, footer in cmds: - print ' case %s:' % id - print ' _debug_printf("\\t%s\\n");' % id - print ' {' - print ' const %s *cmd = (const %s *)body;' % (header, header) - if len(body): - print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' - print ' dump_%s(cmd);' % header - print ' body = (const uint8_t *)&cmd[1];' - for i in range(len(body)): - struct, count = body[i] - idx = indexes[i] - print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) - print ' dump_%s((const %s *)body);' % (struct, struct) - print ' body += sizeof(%s);' % struct - print ' }' - if footer is not None: - print ' while(body + sizeof(%s) <= next) {' % footer - print ' dump_%s((const %s *)body);' % (footer, footer) - print ' body += sizeof(%s);' % footer - print ' }' - if id == 'SVGA_3D_CMD_SHADER_DEFINE': - print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));' - print ' body = next;' - print ' }' - print ' break;' - print ' default:' - print ' _debug_printf("\\t0x%08x\\n", cmd_id);' - print ' break;' - print ' }' - - print r''' - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 1000c31e49a..203c3851bc3 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing ldd progs/trivial/tri -== Traceing == +== Tracing == -For traceing then do +For tracing then do - export XMESA_TRACE=y GALLIUM_TRACE=tri.trace progs/trivial/tri which should create a tri.trace file, which is an XML file. You can view copying diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 80f4874b780..ad47a56fba4 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx, assert(tr_buf->buffer); assert(tr_buf->buffer->screen == tr_scr->screen); + (void) tr_scr; return tr_buf->buffer; } @@ -90,6 +91,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, assert(tr_surf->surface); assert(tr_surf->surface->texture->screen == tr_scr->screen); + (void) tr_scr; return tr_surf->surface; } diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 7e2ccbcfdc5..0f45e211a32 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -40,7 +40,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) #include <stdlib.h> #endif @@ -258,7 +258,7 @@ boolean trace_dump_trace_begin() trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); trace_dump_writes("<trace version='0.1'>\n"); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) /* Linux applications rarely cleanup GL / Gallium resources so catch * application exit here */ atexit(trace_dump_trace_close); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index c31b1d86986..0546aad9b50 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -45,7 +45,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define sleep Sleep -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE) void usleep(int); # define sleep usleep #else @@ -180,7 +180,7 @@ static int trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial) { struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header; struct tr_list *ptr; struct pipe_texture *t; @@ -223,7 +223,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header; struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct tr_list *ptr; struct pipe_screen *screen = tr_scr->screen; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index ac20a47af1e..117503aaff6 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -426,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen, struct pipe_transfer *transfer = tr_trans->transfer; if(tr_trans->map) { - size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride; + size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; trace_dump_call_begin("pipe_screen", "transfer_write"); diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index 1c16042ee5a..e2f981d0513 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -32,7 +32,7 @@ struct tgsi_token; enum trace_shader_type { TRACE_SHADER_FRAGMENT = 0, TRACE_SHADER_VERTEX = 1, - TRACE_SHADER_GEOMETRY = 2, + TRACE_SHADER_GEOMETRY = 2 }; struct trace_shader diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index d5d1e0e76bc..6394e095d35 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -100,6 +100,14 @@ struct pipe_context { unsigned count); /*@}*/ + /** + * Predicate subsequent rendering on occlusion query result + * \param query the query predicate, or NULL if no predicate + * \param mode one of PIPE_COND_RENDER_x + */ + void (*render_condition)( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ); /** * Query objects @@ -158,6 +166,12 @@ struct pipe_context { const struct pipe_shader_state *); void (*bind_vs_state)(struct pipe_context *, void *); void (*delete_vs_state)(struct pipe_context *, void *); + + void * (*create_gs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*bind_gs_state)(struct pipe_context *, void *); + void (*delete_gs_state)(struct pipe_context *, void *); + /*@}*/ /** diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index fe1390d765f..c3b1e634ffc 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -321,23 +321,28 @@ enum pipe_transfer_usage { */ #define PIPE_SHADER_VERTEX 0 #define PIPE_SHADER_FRAGMENT 1 -#define PIPE_SHADER_TYPES 2 +#define PIPE_SHADER_GEOMETRY 2 +#define PIPE_SHADER_TYPES 3 /** * Primitive types: */ -#define PIPE_PRIM_POINTS 0 -#define PIPE_PRIM_LINES 1 -#define PIPE_PRIM_LINE_LOOP 2 -#define PIPE_PRIM_LINE_STRIP 3 -#define PIPE_PRIM_TRIANGLES 4 -#define PIPE_PRIM_TRIANGLE_STRIP 5 -#define PIPE_PRIM_TRIANGLE_FAN 6 -#define PIPE_PRIM_QUADS 7 -#define PIPE_PRIM_QUAD_STRIP 8 -#define PIPE_PRIM_POLYGON 9 -#define PIPE_PRIM_MAX 10 +#define PIPE_PRIM_POINTS 0 +#define PIPE_PRIM_LINES 1 +#define PIPE_PRIM_LINE_LOOP 2 +#define PIPE_PRIM_LINE_STRIP 3 +#define PIPE_PRIM_TRIANGLES 4 +#define PIPE_PRIM_TRIANGLE_STRIP 5 +#define PIPE_PRIM_TRIANGLE_FAN 6 +#define PIPE_PRIM_QUADS 7 +#define PIPE_PRIM_QUAD_STRIP 8 +#define PIPE_PRIM_POLYGON 9 +#define PIPE_PRIM_LINES_ADJACENCY 10 +#define PIPE_PRIM_LINE_STRIP_ADJACENCY 11 +#define PIPE_PRIM_TRIANGLES_ADJACENCY 12 +#define PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY 13 +#define PIPE_PRIM_MAX 14 /** @@ -350,6 +355,15 @@ enum pipe_transfer_usage { /** + * Conditional rendering modes + */ +#define PIPE_RENDER_COND_WAIT 0 +#define PIPE_RENDER_COND_NO_WAIT 1 +#define PIPE_RENDER_COND_BY_REGION_WAIT 2 +#define PIPE_RENDER_COND_BY_REGION_NO_WAIT 3 + + +/** * Point sprite coord modes */ #define PIPE_SPRITE_COORD_NONE 0 diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index d79124828f9..b489b044667 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -121,17 +121,18 @@ struct tgsi_declaration_range unsigned Last : 16; /**< UINT */ }; -#define TGSI_SEMANTIC_POSITION 0 -#define TGSI_SEMANTIC_COLOR 1 -#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define TGSI_SEMANTIC_FOG 3 -#define TGSI_SEMANTIC_PSIZE 4 -#define TGSI_SEMANTIC_GENERIC 5 -#define TGSI_SEMANTIC_NORMAL 6 -#define TGSI_SEMANTIC_FACE 7 -#define TGSI_SEMANTIC_EDGEFLAG 8 -#define TGSI_SEMANTIC_INSTANCEID 9 -#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */ +#define TGSI_SEMANTIC_POSITION 0 +#define TGSI_SEMANTIC_COLOR 1 +#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define TGSI_SEMANTIC_FOG 3 +#define TGSI_SEMANTIC_PSIZE 4 +#define TGSI_SEMANTIC_GENERIC 5 +#define TGSI_SEMANTIC_NORMAL 6 +#define TGSI_SEMANTIC_FACE 7 +#define TGSI_SEMANTIC_EDGEFLAG 8 +#define TGSI_SEMANTIC_PRIMID 9 +#define TGSI_SEMANTIC_INSTANCEID 10 +#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ struct tgsi_declaration_semantic { @@ -141,6 +142,8 @@ struct tgsi_declaration_semantic }; #define TGSI_IMM_FLOAT32 0 +#define TGSI_IMM_UINT32 1 +#define TGSI_IMM_INT32 2 struct tgsi_immediate { @@ -153,6 +156,8 @@ struct tgsi_immediate union tgsi_immediate_data { float Float; + unsigned Uint; + int Int; }; #define TGSI_PROPERTY_GS_INPUT_PRIM 0 @@ -264,7 +269,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_NOT 85 #define TGSI_OPCODE_TRUNC 86 #define TGSI_OPCODE_SHL 87 -#define TGSI_OPCODE_SHR 88 + /* gap */ #define TGSI_OPCODE_AND 89 #define TGSI_OPCODE_OR 90 #define TGSI_OPCODE_MOD 91 @@ -289,7 +294,33 @@ struct tgsi_property_data { #define TGSI_OPCODE_KIL 116 /* conditional kill */ #define TGSI_OPCODE_END 117 /* aka HALT */ /* gap */ -#define TGSI_OPCODE_LAST 119 +#define TGSI_OPCODE_F2I 119 +#define TGSI_OPCODE_IDIV 120 +#define TGSI_OPCODE_IMAX 121 +#define TGSI_OPCODE_IMIN 122 +#define TGSI_OPCODE_INEG 123 +#define TGSI_OPCODE_ISGE 124 +#define TGSI_OPCODE_ISHR 125 +#define TGSI_OPCODE_ISLT 126 +#define TGSI_OPCODE_F2U 127 +#define TGSI_OPCODE_U2F 128 +#define TGSI_OPCODE_UADD 129 +#define TGSI_OPCODE_UDIV 130 +#define TGSI_OPCODE_UMAD 131 +#define TGSI_OPCODE_UMAX 132 +#define TGSI_OPCODE_UMIN 133 +#define TGSI_OPCODE_UMOD 134 +#define TGSI_OPCODE_UMUL 135 +#define TGSI_OPCODE_USEQ 136 +#define TGSI_OPCODE_USGE 137 +#define TGSI_OPCODE_USHR 138 +#define TGSI_OPCODE_USLT 139 +#define TGSI_OPCODE_USNE 140 +#define TGSI_OPCODE_SWITCH 141 +#define TGSI_OPCODE_CASE 142 +#define TGSI_OPCODE_DEFAULT 143 +#define TGSI_OPCODE_ENDSWITCH 144 +#define TGSI_OPCODE_LAST 145 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c index 0b60b5be059..d55aa51b82d 100644 --- a/src/gallium/state_trackers/egl/egl_surface.c +++ b/src/gallium/state_trackers/egl/egl_surface.c @@ -171,9 +171,9 @@ drm_takedown_shown_screen(_EGLDisplay *dpy, struct drm_screen *screen) drmModeSetCrtc( dev->drmFD, screen->crtcID, - 0, // FD + 0, /* FD */ 0, 0, - NULL, 0, // List of output ids + NULL, 0, /* List of output ids */ NULL); drmModeRmFB(dev->drmFD, screen->fbID); diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c index 745803c7eb0..9345b0f4908 100644 --- a/src/gallium/state_trackers/egl/egl_tracker.c +++ b/src/gallium/state_trackers/egl/egl_tracker.c @@ -152,6 +152,7 @@ drm_initialize(_EGLDriver *drv, _EGLDisplay *disp, EGLint *major, EGLint *minor) int num_screens = 0; EGLint i; int fd; + _EGLConfig *config; dev = (struct drm_device *) calloc(1, sizeof(struct drm_device)); if (!dev) @@ -206,7 +207,7 @@ drm_initialize(_EGLDriver *drv, _EGLDisplay *disp, EGLint *major, EGLint *minor) disp->DriverData = dev; /* for now we only have one config */ - _EGLConfig *config = calloc(1, sizeof(*config)); + config = calloc(1, sizeof(*config)); memset(config, 1, sizeof(*config)); _eglInitConfig(config, 1); _eglSetConfigAttrib(config, EGL_RED_SIZE, 8); diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index f2881b9a31e..228ac9a20e9 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -644,6 +644,7 @@ register_with_display(Display *dpy) XExtCodes *c = XAddExtension(dpy); ext = dpy->ext_procs; /* new extension is at head of list */ assert(c->extension == ext->codes.extension); + (void) c; ext->name = _mesa_strdup(extName); ext->close_display = close_display_callback; } diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index c76dfb31d2b..1783bc504d9 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -67,6 +67,10 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" +#include "trace/tr_screen.h" +#include "trace/tr_context.h" +#include "trace/tr_texture.h" + #include "xm_winsys.h" #include <GL/glx.h> @@ -87,6 +91,8 @@ void xmesa_set_driver( const struct xm_driver *templ ) */ pipe_mutex _xmesa_lock; +static struct pipe_screen *_screen = NULL; +static struct pipe_screen *screen = NULL; /**********************************************************************/ @@ -754,7 +760,7 @@ PUBLIC XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) { static GLboolean firstTime = GL_TRUE; - static struct pipe_screen *screen = NULL; + struct pipe_context *_pipe = NULL; struct pipe_context *pipe = NULL; XMesaContext c; GLcontext *mesaCtx; @@ -762,7 +768,8 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (firstTime) { pipe_mutex_init(_xmesa_lock); - screen = driver.create_pipe_screen(); + _screen = driver.create_pipe_screen(); + screen = trace_screen_create( _screen ); firstTime = GL_FALSE; } @@ -781,9 +788,11 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (screen == NULL) goto fail; - pipe = driver.create_pipe_context(screen, (void *) c); - if (pipe == NULL) + _pipe = driver.create_pipe_context(_screen, (void *) c); + if (_pipe == NULL) goto fail; + pipe = trace_context_create(screen, _pipe); + pipe->priv = c; c->st = st_create_context(pipe, &v->mesa_visual, @@ -1110,6 +1119,12 @@ void XMesaSwapBuffers( XMesaBuffer b ) st_swapbuffers(b->stfb, &frontLeftSurf, NULL); if (frontLeftSurf) { + if (_screen != screen) { + struct trace_surface *tr_surf = trace_surface( frontLeftSurf ); + struct pipe_surface *surf = tr_surf->surface; + frontLeftSurf = surf; + } + driver.display_surface(b, frontLeftSurf); } diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript index ec385e7c447..d4fdd43688f 100644 --- a/src/gallium/state_trackers/python/SConscript +++ b/src/gallium/state_trackers/python/SConscript @@ -38,10 +38,12 @@ if 'python' in env['statetrackers']: ], ) + env['no_import_lib'] = 1 + env.SharedLibrary( target = '_gallium', source = [ 'st_hardpipe_winsys.c', ], - LIBS = [pyst, softpipe, trace] + auxiliaries + env['LIBS'], + LIBS = [pyst, softpipe, trace] + gallium + env['LIBS'], ) diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 8e323f4896d..96b13c2258e 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -46,6 +46,7 @@ #include "util/u_draw_quad.h" #include "util/u_tile.h" #include "util/u_math.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 9728207d9c7..84ce1a41e6d 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -52,11 +52,16 @@ struct st_context { cso_set_blend($self->cso, state); } - void set_sampler( unsigned index, const struct pipe_sampler_state *state ) { + void set_fragment_sampler( unsigned index, const struct pipe_sampler_state *state ) { cso_single_sampler($self->cso, index, state); cso_single_sampler_done($self->cso); } + void set_vertex_sampler( unsigned index, const struct pipe_sampler_state *state ) { + cso_single_vertex_sampler($self->cso, index, state); + cso_single_vertex_sampler_done($self->cso); + } + void set_rasterizer( const struct pipe_rasterizer_state *state ) { cso_set_rasterizer($self->cso, state); } @@ -103,6 +108,25 @@ struct st_context { $self->vs = vs; } + void set_geometry_shader( const struct pipe_shader_state *state ) { + void *gs; + + if(!state) { + cso_set_geometry_shader_handle($self->cso, NULL); + return; + } + + gs = $self->pipe->create_gs_state($self->pipe, state); + if(!gs) + return; + + if(cso_set_geometry_shader_handle($self->cso, gs) != PIPE_OK) + return; + + cso_delete_geometry_shader($self->cso, $self->gs); + $self->gs = gs; + } + /* * Parameter-like state (or properties) */ @@ -142,14 +166,24 @@ struct st_context { cso_set_viewport($self->cso, state); } - void set_sampler_texture(unsigned index, - struct pipe_texture *texture) { + void set_fragment_sampler_texture(unsigned index, + struct pipe_texture *texture) { if(!texture) texture = $self->default_texture; - pipe_texture_reference(&$self->sampler_textures[index], texture); - $self->pipe->set_fragment_sampler_textures($self->pipe, + pipe_texture_reference(&$self->fragment_sampler_textures[index], texture); + $self->pipe->set_fragment_sampler_textures($self->pipe, PIPE_MAX_SAMPLERS, - $self->sampler_textures); + $self->fragment_sampler_textures); + } + + void set_vertex_sampler_texture(unsigned index, + struct pipe_texture *texture) { + if(!texture) + texture = $self->default_texture; + pipe_texture_reference(&$self->vertex_sampler_textures[index], texture); + $self->pipe->set_vertex_sampler_textures($self->pipe, + PIPE_MAX_VERTEX_SAMPLERS, + $self->vertex_sampler_textures); } void set_vertex_buffer(unsigned index, diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i index 1de7f86a3c7..761587dc533 100644 --- a/src/gallium/state_trackers/python/p_texture.i +++ b/src/gallium/state_trackers/python/p_texture.i @@ -132,8 +132,8 @@ struct st_surface struct pipe_transfer *transfer; unsigned stride; - stride = pf_get_stride(texture->format, w); - *LENGTH = pf_get_nblocksy(texture->format, h) * stride; + stride = util_format_get_stride(texture->format, w); + *LENGTH = util_format_get_nblocksy(texture->format, h) * stride; *STRING = (char *) malloc(*LENGTH); if(!*STRING) return; @@ -159,9 +159,9 @@ struct st_surface struct pipe_transfer *transfer; if(stride == 0) - stride = pf_get_stride(texture->format, w); + stride = util_format_get_stride(texture->format, w); - if(LENGTH < pf_get_nblocksy(texture->format, h) * stride) + if(LENGTH < util_format_get_nblocksy(texture->format, h) * stride) SWIG_exception(SWIG_ValueError, "offset must be smaller than buffer size"); transfer = screen->get_tex_transfer(screen, diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index 110b3d0ec11..a68709f5cf3 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -52,10 +52,10 @@ def make_image(surface, x=None, y=None, w=None, h=None): w = surface.width - x if h is None: h = surface.height - y - data = surface.get_tile_rgba8(0, 0, surface.width, surface.height) + data = surface.get_tile_rgba8(x, y, surface.width, surface.height) import Image - outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1) + outimage = Image.fromstring('RGBA', (w, h), data, "raw", 'RGBA', 0, 1) return outimage def save_image(filename, surface, x=None, y=None, w=None, h=None): @@ -278,9 +278,9 @@ class Screen(Object): def texture_create(self, templat): return self.real.texture_create( format = templat.format, - width = templat.width0, - height = templat.height0, - depth = templat.depth0, + width = templat.width, + height = templat.height, + depth = templat.depth, last_level = templat.last_level, target = templat.target, tex_usage = templat.tex_usage, @@ -387,9 +387,13 @@ class Context(Object): def delete_sampler_state(self, state): pass + def bind_vertex_sampler_states(self, num_states, states): + for i in range(num_states): + self.real.set_vertex_sampler(i, states[i]) + def bind_fragment_sampler_states(self, num_states, states): for i in range(num_states): - self.real.set_sampler(i, states[i]) + self.real.set_fragment_sampler(i, states[i]) def create_rasterizer_state(self, state): return state @@ -487,7 +491,11 @@ class Context(Object): def set_fragment_sampler_textures(self, num_textures, textures): for i in range(num_textures): - self.real.set_sampler_texture(i, textures[i]) + self.real.set_fragment_sampler_texture(i, textures[i]) + + def set_vertex_sampler_textures(self, num_textures, textures): + for i in range(num_textures): + self.real.set_vertex_sampler_texture(i, textures[i]) def set_vertex_buffers(self, num_buffers, buffers): self.vbufs = buffers[0:num_buffers] diff --git a/src/gallium/state_trackers/python/samples/gs.py b/src/gallium/state_trackers/python/samples/gs.py new file mode 100644 index 00000000000..1ceead5f17c --- /dev/null +++ b/src/gallium/state_trackers/python/samples/gs.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2009 VMware +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################## + + +from gallium import * + + +def make_image(surface): + data = surface.get_tile_rgba8(0, 0, surface.width, surface.height) + + import Image + outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1) + return outimage + +def save_image(filename, surface): + outimage = make_image(surface) + outimage.save(filename, "PNG") + +def show_image(surface): + outimage = make_image(surface) + + import Tkinter as tk + from PIL import Image, ImageTk + root = tk.Tk() + + root.title('background image') + + image1 = ImageTk.PhotoImage(outimage) + w = image1.width() + h = image1.height() + x = 100 + y = 100 + root.geometry("%dx%d+%d+%d" % (w, h, x, y)) + panel1 = tk.Label(root, image=image1) + panel1.pack(side='top', fill='both', expand='yes') + panel1.image = image1 + root.mainloop() + + +def test(dev): + ctx = dev.context_create() + + width = 255 + height = 255 + minz = 0.0 + maxz = 1.0 + + # disabled blending/masking + blend = Blend() + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.colormask = PIPE_MASK_RGBA + ctx.set_blend(blend) + + # depth/stencil/alpha + depth_stencil_alpha = DepthStencilAlpha() + depth_stencil_alpha.depth.enabled = 1 + depth_stencil_alpha.depth.writemask = 1 + depth_stencil_alpha.depth.func = PIPE_FUNC_LESS + ctx.set_depth_stencil_alpha(depth_stencil_alpha) + + # rasterizer + rasterizer = Rasterizer() + rasterizer.front_winding = PIPE_WINDING_CW + rasterizer.cull_mode = PIPE_WINDING_NONE + rasterizer.scissor = 1 + ctx.set_rasterizer(rasterizer) + + # viewport + viewport = Viewport() + scale = FloatArray(4) + scale[0] = width / 2.0 + scale[1] = -height / 2.0 + scale[2] = (maxz - minz) / 2.0 + scale[3] = 1.0 + viewport.scale = scale + translate = FloatArray(4) + translate[0] = width / 2.0 + translate[1] = height / 2.0 + translate[2] = (maxz - minz) / 2.0 + translate[3] = 0.0 + viewport.translate = translate + ctx.set_viewport(viewport) + + # samplers + sampler = Sampler() + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.normalized_coords = 1 + ctx.set_sampler(0, sampler) + + # scissor + scissor = Scissor() + scissor.minx = 0 + scissor.miny = 0 + scissor.maxx = width + scissor.maxy = height + ctx.set_scissor(scissor) + + clip = Clip() + clip.nr = 0 + ctx.set_clip(clip) + + # framebuffer + cbuf = dev.texture_create( + PIPE_FORMAT_X8R8G8B8_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + ).get_surface() + zbuf = dev.texture_create( + PIPE_FORMAT_Z16_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL, + ).get_surface() + fb = Framebuffer() + fb.width = width + fb.height = height + fb.nr_cbufs = 1 + fb.set_cbuf(0, cbuf) + fb.set_zsbuf(zbuf) + ctx.set_framebuffer(fb) + rgba = FloatArray(4); + rgba[0] = 0.0 + rgba[1] = 0.0 + rgba[2] = 0.0 + rgba[3] = 0.0 + ctx.clear(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, rgba, 1.0, 0xff) + + # vertex shader + vs = Shader(''' + VERT + DCL IN[0], POSITION, CONSTANT + DCL IN[1], COLOR, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:MOV OUT[1], IN[1] + 2:END + ''') + ctx.set_vertex_shader(vs) + + gs = Shader(''' + GEOM + PROPERTY GS_INPUT_PRIMITIVE TRIANGLES + PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP + DCL IN[][0], POSITION, CONSTANT + DCL IN[][1], COLOR, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], COLOR, CONSTANT + 0:MOV OUT[0], IN[0][0] + 1:MOV OUT[1], IN[0][1] + 2:EMIT + 3:MOV OUT[0], IN[1][0] + 4:MOV OUT[1], IN[1][1] + 5:EMIT + 6:MOV OUT[0], IN[2][0] + 7:MOV OUT[1], IN[2][1] + 8:EMIT + 9:ENDPRIM + 10:END + ''') + ctx.set_geometry_shader(gs) + + # fragment shader + fs = Shader(''' + FRAG + DCL IN[0], COLOR, LINEAR + DCL OUT[0], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:END + ''') + ctx.set_fragment_shader(fs) + + nverts = 3 + nattrs = 2 + verts = FloatArray(nverts * nattrs * 4) + + verts[ 0] = 0.0 # x1 + verts[ 1] = 0.8 # y1 + verts[ 2] = 0.2 # z1 + verts[ 3] = 1.0 # w1 + verts[ 4] = 1.0 # r1 + verts[ 5] = 0.0 # g1 + verts[ 6] = 0.0 # b1 + verts[ 7] = 1.0 # a1 + verts[ 8] = -0.8 # x2 + verts[ 9] = -0.8 # y2 + verts[10] = 0.5 # z2 + verts[11] = 1.0 # w2 + verts[12] = 0.0 # r2 + verts[13] = 1.0 # g2 + verts[14] = 0.0 # b2 + verts[15] = 1.0 # a2 + verts[16] = 0.8 # x3 + verts[17] = -0.8 # y3 + verts[18] = 0.8 # z3 + verts[19] = 1.0 # w3 + verts[20] = 0.0 # r3 + verts[21] = 0.0 # g3 + verts[22] = 1.0 # b3 + verts[23] = 1.0 # a3 + + ctx.draw_vertices(PIPE_PRIM_TRIANGLES, + nverts, + nattrs, + verts) + + ctx.flush() + + show_image(cbuf) + #show_image(zbuf) + #save_image('cbuf.png', cbuf) + #save_image('zbuf.png', zbuf) + + + +def main(): + dev = Device() + test(dev) + + +if __name__ == '__main__': + main() diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py index 87acf60366d..af80426dc6c 100644 --- a/src/gallium/state_trackers/python/samples/tri.py +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -118,7 +118,7 @@ def test(dev): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index 10c7ecbd78f..d144af2447d 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -135,7 +135,9 @@ st_context_destroy(struct st_context *st_ctx) st_ctx->pipe->destroy(st_ctx->pipe); for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) - pipe_texture_reference(&st_ctx->sampler_textures[i], NULL); + pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], NULL); + for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], NULL); pipe_texture_reference(&st_ctx->default_texture, NULL); FREE(st_ctx); @@ -276,9 +278,12 @@ st_context_create(struct st_device *st_dev) } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - pipe_texture_reference(&st_ctx->sampler_textures[i], st_ctx->default_texture); + pipe_texture_reference(&st_ctx->fragment_sampler_textures[i], st_ctx->default_texture); + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + pipe_texture_reference(&st_ctx->vertex_sampler_textures[i], st_ctx->default_texture); - cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->sampler_textures); + cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->fragment_sampler_textures); + cso_set_vertex_sampler_textures(st_ctx->cso, PIPE_MAX_VERTEX_SAMPLERS, st_ctx->vertex_sampler_textures); } /* vertex shader */ diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h index a246b6a1f25..f786e134118 100644 --- a/src/gallium/state_trackers/python/st_device.h +++ b/src/gallium/state_trackers/python/st_device.h @@ -57,9 +57,11 @@ struct st_context { void *vs; void *fs; + void *gs; struct pipe_texture *default_texture; - struct pipe_texture *sampler_textures[PIPE_MAX_SAMPLERS]; + struct pipe_texture *fragment_sampler_textures[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_sampler_textures[PIPE_MAX_VERTEX_SAMPLERS]; unsigned num_vertex_buffers; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py index 35673b3ec92..eed6cdd1e64 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py @@ -96,7 +96,7 @@ def test(dev, name): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py index 5be1ca80f30..41bebd0604a 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py @@ -96,7 +96,7 @@ def test(dev, name): sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST sampler.normalized_coords = 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # scissor scissor = Scissor() diff --git a/src/gallium/state_trackers/python/tests/texture_render.py b/src/gallium/state_trackers/python/tests/texture_render.py index 8a2db9dbcff..79287f2cace 100755 --- a/src/gallium/state_trackers/python/tests/texture_render.py +++ b/src/gallium/state_trackers/python/tests/texture_render.py @@ -144,8 +144,8 @@ class TextureTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) - ctx.set_sampler_texture(0, src_texture) + ctx.set_fragment_sampler(0, sampler) + ctx.set_fragment_sampler_texture(0, src_texture) # framebuffer cbuf_tex = dev.texture_create( diff --git a/src/gallium/state_trackers/python/tests/texture_sample.py b/src/gallium/state_trackers/python/tests/texture_sample.py index 92a6c4dfb9f..520961c8051 100755 --- a/src/gallium/state_trackers/python/tests/texture_sample.py +++ b/src/gallium/state_trackers/python/tests/texture_sample.py @@ -169,7 +169,7 @@ class TextureColorSampleTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # texture texture = dev.texture_create( @@ -189,7 +189,7 @@ class TextureColorSampleTest(TestCase): zslice = zslice, ).sample_rgba(expected_rgba) - ctx.set_sampler_texture(0, texture) + ctx.set_fragment_sampler_texture(0, texture) # framebuffer cbuf_tex = dev.texture_create( @@ -359,7 +359,7 @@ class TextureDepthSampleTest(TestCase): sampler.normalized_coords = 1 sampler.min_lod = 0 sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 - ctx.set_sampler(0, sampler) + ctx.set_fragment_sampler(0, sampler) # texture texture = dev.texture_create( @@ -379,7 +379,7 @@ class TextureDepthSampleTest(TestCase): zslice = zslice, ).sample_rgba(expected_rgba) - ctx.set_sampler_texture(0, texture) + ctx.set_fragment_sampler_texture(0, texture) # framebuffer cbuf_tex = dev.texture_create( diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile index b8c805b06c4..fc97bf51f8f 100644 --- a/src/gallium/state_trackers/vega/Makefile +++ b/src/gallium/state_trackers/vega/Makefile @@ -61,14 +61,7 @@ VG_MINOR = 0 VG_TINY = 0 GALLIUM_LIBS = \ - $(GALLIUM)/src/gallium/auxiliary/pipebuffer/libpipebuffer.a \ - $(GALLIUM)/src/gallium/auxiliary/sct/libsct.a \ - $(GALLIUM)/src/gallium/auxiliary/draw/libdraw.a \ - $(GALLIUM)/src/gallium/auxiliary/rtasm/librtasm.a \ - $(GALLIUM)/src/gallium/auxiliary/translate/libtranslate.a \ - $(GALLIUM)/src/gallium/auxiliary/cso_cache/libcso_cache.a \ - $(GALLIUM)/src/gallium/auxiliary/util/libutil.a \ - $(GALLIUM)/src/gallium/auxiliary/tgsi/libtgsi.a + $(GALLIUM)/src/gallium/auxiliary/libgallium.a .SUFFIXES : .cpp diff --git a/src/gallium/state_trackers/vega/api_path.c b/src/gallium/state_trackers/vega/api_path.c index a6b7a2bb93a..15ac1900f4b 100644 --- a/src/gallium/state_trackers/vega/api_path.c +++ b/src/gallium/state_trackers/vega/api_path.c @@ -164,8 +164,7 @@ void vgAppendPathData(VGPath dstPath, return; } for (i = 0; i < numSegments; ++i) { - if (pathSegments[i] < VG_CLOSE_PATH || - pathSegments[i] > VG_LCWARC_TO_REL) { + if (pathSegments[i] > VG_LCWARC_TO_REL) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } diff --git a/src/gallium/state_trackers/vega/arc.c b/src/gallium/state_trackers/vega/arc.c index 8b04d21ea76..2d123408702 100644 --- a/src/gallium/state_trackers/vega/arc.c +++ b/src/gallium/state_trackers/vega/arc.c @@ -528,7 +528,6 @@ static INLINE int num_beziers_needed(struct arc *arc) double threshold = 0.05; VGboolean found = VG_FALSE; int n = 1; - int i; double min_eta, max_eta; min_eta = MIN2(arc->eta1, arc->eta2); @@ -538,6 +537,7 @@ static INLINE int num_beziers_needed(struct arc *arc) double d_eta = (max_eta - min_eta) / n; if (d_eta <= 0.5 * M_PI) { double eta_b = min_eta; + int i; found = VG_TRUE; for (i = 0; found && (i < n); ++i) { double etaA = eta_b; diff --git a/src/gallium/state_trackers/vega/bezier.c b/src/gallium/state_trackers/vega/bezier.c index 0d5504004cc..5769e8ea868 100644 --- a/src/gallium/state_trackers/vega/bezier.c +++ b/src/gallium/state_trackers/vega/bezier.c @@ -256,7 +256,6 @@ static enum shift_result good_offset(const struct bezier *b1, const float max_dist_normal = threshold*offset; const float spacing = 0.25; float i; - for (i = spacing; i < 0.99; i += spacing) { float p1[2],p2[2], d, l; float normal[2]; diff --git a/src/gallium/state_trackers/vega/stroker.c b/src/gallium/state_trackers/vega/stroker.c index 1b92d2b5c62..68a52029db0 100644 --- a/src/gallium/state_trackers/vega/stroker.c +++ b/src/gallium/state_trackers/vega/stroker.c @@ -476,7 +476,7 @@ static enum intersection_type line_intersect(const VGfloat *l1, const VGfloat *l2, float *intersection_point) { - VGfloat isect[2]; + VGfloat isect[2] = { 0 }; enum intersection_type type; VGboolean dx_zero, ldx_zero; @@ -649,7 +649,7 @@ static void create_joins(struct stroker *stroker, VGfloat prev_line[] = {stroker->back2_x, stroker->back2_y, stroker->back1_x, stroker->back1_y}; - VGfloat isect[2]; + VGfloat isect[2] = { 0 }; enum intersection_type type = line_intersect(prev_line, next_line, isect); if (join == SquareJoin) { diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index b05944a33b3..352c087475e 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -11,10 +11,11 @@ if env['platform'] in ['windows']: '.', ]) - env.Append(CPPDEFINES = [ + env.AppendUnique(CPPDEFINES = [ '_GDI32_', # prevent wgl* being declared __declspec(dllimport) 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers 'WIN32_THREADS', # use Win32 thread API + 'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx ]) sources = [ diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index c776faa53f8..650d2c0d1db 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -71,6 +71,8 @@ struct crtc_private static void crtc_dpms(xf86CrtcPtr crtc, int mode) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ + switch (mode) { case DPMSModeOn: case DPMSModeStandby: @@ -121,7 +123,8 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, drm_mode.vrefresh = mode->VRefresh; if (!mode->name) xf86SetModeDefaultName(mode); - strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN); + strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN - 1); + drm_mode.name[DRM_DISPLAY_MODE_LEN - 1] = '\0'; ret = drmModeSetCrtc(ms->fd, drm_crtc->crtc_id, ms->fb_id, x, y, &drm_connector->connector_id, 1, &drm_mode); @@ -147,18 +150,23 @@ crtc_gamma_set(xf86CrtcPtr crtc, CARD16 * red, CARD16 * green, CARD16 * blue, static void * crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ + return NULL; } static PixmapPtr crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ + return NULL; } static void crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr rotate_pixmap, void *data) { + /* ScrnInfoPtr pScrn = crtc->scrn; */ } /* diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index 8a24aa10a3c..b02fe68f313 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -181,8 +181,7 @@ drv_crtc_resize(ScrnInfoPtr pScrn, int width, int height) if (!pScreen->ModifyPixmapHeader(rootPixmap, width, height, -1, -1, -1, NULL)) return FALSE; - /* HW dependent - FIXME */ - pScrn->displayWidth = pScrn->virtualX; + pScrn->displayWidth = rootPixmap->devKind / (rootPixmap->drawable.bitsPerPixel / 8); /* now create new frontbuffer */ return ms->create_front_buffer(pScrn) && ms->bind_front_buffer(pScrn); @@ -220,6 +219,12 @@ static Bool drv_init_resource_management(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); + /* + ScreenPtr pScreen = pScrn->pScreen; + PixmapPtr rootPixmap = pScreen->GetScreenPixmap(pScreen); + Bool fbAccessDisabled; + CARD8 *fbstart; + */ if (ms->screen || ms->kms) return TRUE; @@ -249,9 +254,19 @@ static Bool drv_close_resource_management(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); + int i; - if (ms->screen) + if (ms->screen) { + assert(ms->ctx == NULL); + + for (i = 0; i < XORG_NR_FENCES; i++) { + if (ms->fence[i]) { + ms->screen->fence_finish(ms->screen, ms->fence[i], 0); + ms->screen->fence_reference(ms->screen, &ms->fence[i], NULL); + } + } ms->screen->destroy(ms->screen); + } ms->screen = NULL; if (ms->api && ms->api->destroy) @@ -461,7 +476,7 @@ static void drv_block_handler(int i, pointer blockData, pointer pTimeout, * quite small. Let us get a fair way ahead of hardware before * throttling. */ - for (j = 0; j < XORG_NR_FENCES; j++) + for (j = 0; j < XORG_NR_FENCES - 1; j++) ms->screen->fence_reference(ms->screen, &ms->fence[j], ms->fence[j+1]); @@ -915,6 +930,12 @@ drv_destroy_front_buffer_kms(ScrnInfoPtr pScrn) ScreenPtr pScreen = pScrn->pScreen; PixmapPtr rootPixmap = pScreen->GetScreenPixmap(pScreen); + /* XXX Do something with the rootPixmap. + * This currently works fine but if we are getting crashes in + * the fb functions after VT switches maybe look more into it. + */ + (void)rootPixmap; + if (!ms->root_bo) return TRUE; diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index d5c005ebadd..aa68570b9c0 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -516,6 +516,7 @@ ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, #endif debug_assert(priv == exa->copy.dst); + (void) priv; if (exa->copy.use_surface_copy) { /* XXX: consider exposing >1 box in surface_copy interface. @@ -1019,6 +1020,9 @@ xorg_exa_close(ScrnInfoPtr pScrn) if (exa->pipe) exa->pipe->destroy(exa->pipe); + exa->pipe = NULL; + /* Since this was shared be proper with the pointer */ + ms->ctx = NULL; exaDriverFini(pScrn->pScreen); xfree(exa); diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index 89b794a09ac..bed17caab77 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -47,22 +47,22 @@ static void print_fs_traits(int fs_traits) { const char *strings[] = { - "FS_COMPOSITE", /* = 1 << 0 */ - "FS_MASK", /* = 1 << 1 */ - "FS_SOLID_FILL", /* = 1 << 2 */ - "FS_LINGRAD_FILL", /* = 1 << 3 */ - "FS_RADGRAD_FILL", /* = 1 << 4 */ - "FS_CA_FULL", /* = 1 << 5 - src.rgba * mask.rgba */ - "FS_CA_SRCALPHA", /* = 1 << 6 - src.aaaa * mask.rgba */ - "FS_YUV", /* = 1 << 7 */ - "FS_SRC_REPEAT_NONE", /* = 1 << 8 */ - "FS_MASK_REPEAT_NONE",/* = 1 << 9 */ - "FS_SRC_SWIZZLE_RGB", /* = 1 << 10 */ - "FS_MASK_SWIZZLE_RGB",/* = 1 << 11 */ - "FS_SRC_SET_ALPHA", /* = 1 << 12 */ - "FS_MASK_SET_ALPHA", /* = 1 << 13 */ - "FS_SRC_LUMINANCE", /* = 1 << 14 */ - "FS_MASK_LUMINANCE", /* = 1 << 15 */ + "FS_COMPOSITE", /* = 1 << 0, */ + "FS_MASK", /* = 1 << 1, */ + "FS_SOLID_FILL", /* = 1 << 2, */ + "FS_LINGRAD_FILL", /* = 1 << 3, */ + "FS_RADGRAD_FILL", /* = 1 << 4, */ + "FS_CA_FULL", /* = 1 << 5, */ /* src.rgba * mask.rgba */ + "FS_CA_SRCALPHA", /* = 1 << 6, */ /* src.aaaa * mask.rgba */ + "FS_YUV", /* = 1 << 7, */ + "FS_SRC_REPEAT_NONE", /* = 1 << 8, */ + "FS_MASK_REPEAT_NONE",/* = 1 << 9, */ + "FS_SRC_SWIZZLE_RGB", /* = 1 << 10, */ + "FS_MASK_SWIZZLE_RGB",/* = 1 << 11, */ + "FS_SRC_SET_ALPHA", /* = 1 << 12, */ + "FS_MASK_SET_ALPHA", /* = 1 << 13, */ + "FS_SRC_LUMINANCE", /* = 1 << 14, */ + "FS_MASK_LUMINANCE", /* = 1 << 15, */ }; int i, k; debug_printf("%s: ", __func__); @@ -492,6 +492,7 @@ create_fs(struct pipe_context *pipe, /* it has to be either a fill, a composite op or a yuv conversion */ debug_assert((is_fill ^ is_composite) ^ is_yuv); + (void) is_yuv; out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index ba15f8a7845..d80f341e6c2 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -438,6 +438,7 @@ void renderer_copy_prepare(struct xorg_renderer *r, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + (void) screen; /* set misc state we care about */ diff --git a/src/gallium/winsys/drm/i965/dri/SConscript b/src/gallium/winsys/drm/i965/dri/SConscript index 233ef464be5..a99533fd245 100644 --- a/src/gallium/winsys/drm/i965/dri/SConscript +++ b/src/gallium/winsys/drm/i965/dri/SConscript @@ -14,6 +14,6 @@ drivers = [ env.LoadableModule( target ='i965_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = drivers + mesa + auxiliaries + env['LIBS'], + LIBS = drivers + mesa + gallium + env['LIBS'], SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index b1b654d9f8b..104e987083f 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -15,6 +15,6 @@ drivers = [ env.LoadableModule( target ='i915_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = drivers + mesa + auxiliaries + env['LIBS'], + LIBS = drivers + mesa + gallium + env['LIBS'], SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index dec7c065036..05194fc52a2 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -206,7 +206,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api, int retval, fd; struct drm_gem_flink flink; struct radeon_pipe_buffer* radeon_buffer; - struct pipe_buffer *buffer; + struct pipe_buffer *buffer = NULL; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; @@ -239,7 +239,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, unsigned *stride, unsigned *handle) { - struct pipe_buffer *buffer; + struct pipe_buffer *buffer = NULL; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; } diff --git a/src/gallium/winsys/drm/radeon/dri/SConscript b/src/gallium/winsys/drm/radeon/dri/SConscript index aea987a3aca..c4989d1b595 100644 --- a/src/gallium/winsys/drm/radeon/dri/SConscript +++ b/src/gallium/winsys/drm/radeon/dri/SConscript @@ -13,5 +13,5 @@ drivers = [ env.SharedLibrary( target ='radeon_dri.so', source = COMMON_GALLIUM_SOURCES, - LIBS = st_dri + radeonwinsys + mesa + drivers + auxiliaries + env['LIBS'], + LIBS = st_dri + radeonwinsys + mesa + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/winsys/drm/radeon/python/SConscript b/src/gallium/winsys/drm/radeon/python/SConscript index 3200fd8d1b0..91cae986975 100644 --- a/src/gallium/winsys/drm/radeon/python/SConscript +++ b/src/gallium/winsys/drm/radeon/python/SConscript @@ -29,5 +29,5 @@ if env['platform'] == 'linux': env.SharedLibrary( target ='_gallium', source = sources, - LIBS = [pyst] + drivers + auxiliaries + env['LIBS'], + LIBS = [pyst] + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/winsys/drm/radeon/xorg/Makefile b/src/gallium/winsys/drm/radeon/xorg/Makefile index 9fa16dab24c..0eb1b3988f3 100644 --- a/src/gallium/winsys/drm/radeon/xorg/Makefile +++ b/src/gallium/winsys/drm/radeon/xorg/Makefile @@ -1,11 +1,16 @@ -TARGET = modesetting_drv.so -CFILES = $(wildcard ./*.c) -OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) -GALLIUMDIR = ../../../.. TOP = ../../../../../.. + +GALLIUMDIR = $(TOP)/src/gallium + +TARGET = radeong_drv.so + +CFILES = $(wildcard ./*.c) + include ${TOP}/configs/current +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) + CFLAGS = -DHAVE_CONFIG_H \ -g -Wall -Wimplicit-function-declaration -fPIC \ $(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \ @@ -24,16 +29,21 @@ LIBS = \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) +TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) ############################################# +all default: $(TARGET) $(TARGET_STAGING) - -all default: $(TARGET) - -$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a +$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS) $(TOP)/bin/mklib -noprefix -o $@ \ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_radeon +$(TOP)/$(LIB_DIR)/gallium: + mkdir -p $@ + +$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium + $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium + clean: rm -rf $(OBJECTS) $(TARGET) diff --git a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c index 837f2aa8fec..bb76cc03499 100644 --- a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c +++ b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c @@ -53,7 +53,7 @@ static PciChipsets radeon_xorg_pci_devices[] = { }; static XF86ModuleVersionInfo radeon_xorg_version = { - "modesetting", + "radeong", MODULEVENDORSTRING, MODINFOSTRING1, MODINFOSTRING2, @@ -69,9 +69,9 @@ static XF86ModuleVersionInfo radeon_xorg_version = { * Xorg driver exported structures */ -_X_EXPORT DriverRec modesetting = { +_X_EXPORT DriverRec radeong = { 1, - "modesetting", + "radeong", radeon_xorg_identify, NULL, xorg_tracker_available_options, @@ -84,7 +84,7 @@ _X_EXPORT DriverRec modesetting = { static MODULESETUPPROTO(radeon_xorg_setup); -_X_EXPORT XF86ModuleData modesettingModuleData = { +_X_EXPORT XF86ModuleData radeongModuleData = { &radeon_xorg_version, radeon_xorg_setup, NULL @@ -103,7 +103,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) */ if (!setupDone) { setupDone = 1; - xf86AddDriver(&modesetting, module, HaveDriverFuncs); + xf86AddDriver(&radeong, module, HaveDriverFuncs); /* * The return value must be non-NULL on success even though there @@ -120,7 +120,7 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) static void radeon_xorg_identify(int flags) { - xf86PrintChipsets("modesetting", "Driver for Modesetting Kernel Drivers", + xf86PrintChipsets("radeong", "Driver for Radeon Gallium with KMS", radeon_xorg_chipsets); } @@ -135,8 +135,8 @@ radeon_xorg_pci_probe(DriverPtr driver, NULL, NULL, NULL, NULL, NULL); if (scrn != NULL) { scrn->driverVersion = 1; - scrn->driverName = "radeon"; - scrn->name = "modesetting"; + scrn->driverName = "radeong"; + scrn->name = "radeong"; scrn->Probe = NULL; entity = xf86GetEntityInfo(entity_num); diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript index 1019f577a5f..84319f91ff1 100644 --- a/src/gallium/winsys/drm/vmware/dri/SConscript +++ b/src/gallium/winsys/drm/vmware/dri/SConscript @@ -48,7 +48,7 @@ if env['platform'] == 'linux': svgadrm, svga, mesa, - auxiliaries, + gallium, ]) # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript index b8968e7137b..1e5d8ff7fed 100644 --- a/src/gallium/winsys/drm/vmware/xorg/SConscript +++ b/src/gallium/winsys/drm/vmware/xorg/SConscript @@ -38,12 +38,13 @@ if env['platform'] == 'linux': st_xorg, svgadrm, svga, - auxiliaries, + gallium, ]) sources = [ 'vmw_ioctl.c', 'vmw_screen.c', + 'vmw_video.c', 'vmw_xorg.c', ] diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile index 2997f6b79ce..3965bd949f4 100644 --- a/src/gallium/winsys/g3dvl/nouveau/Makefile +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -19,11 +19,7 @@ CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ LDFLAGS += -L${DRMDIR}/lib \ -L${DRIDIR}/lib \ -L${GALLIUMDIR}/winsys/drm/nouveau/common \ - -L${GALLIUMDIR}/auxiliary/draw \ - -L${GALLIUMDIR}/auxiliary/tgsi \ - -L${GALLIUMDIR}/auxiliary/translate \ - -L${GALLIUMDIR}/auxiliary/rtasm \ - -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/auxiliary \ -L${GALLIUMDIR}/drivers/nv04 \ -L${GALLIUMDIR}/drivers/nv10 \ -L${GALLIUMDIR}/drivers/nv20 \ @@ -31,7 +27,7 @@ LDFLAGS += -L${DRMDIR}/lib \ -L${GALLIUMDIR}/drivers/nv40 \ -L${GALLIUMDIR}/drivers/nv50 -LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm +LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -lgallium -lm ############################################# diff --git a/src/gallium/winsys/g3dvl/xlib/Makefile b/src/gallium/winsys/g3dvl/xlib/Makefile index cf765ef51a5..9877660a276 100644 --- a/src/gallium/winsys/g3dvl/xlib/Makefile +++ b/src/gallium/winsys/g3dvl/xlib/Makefile @@ -25,13 +25,7 @@ SOURCES = xsp_winsys.c OBJECTS = $(SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o LIBS = $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/auxiliary/vl/libvl.a \ - $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \ - $(TOP)/src/gallium/auxiliary/draw/libdraw.a \ - $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \ - $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \ - $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \ - $(TOP)/src/gallium/auxiliary/util/libutil.a + $(TOP)/src/gallium/auxiliary/libgallium.a .c.o: $(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript index 74f6b2fd475..4cbc86f3311 100644 --- a/src/gallium/winsys/gdi/SConscript +++ b/src/gallium/winsys/gdi/SConscript @@ -42,8 +42,10 @@ if env['platform'] == 'windows': drivers += [trace] + env['no_import_lib'] = 1 + env.SharedLibrary( target ='opengl32', source = sources, - LIBS = wgl + glapi + mesa + drivers + auxiliaries + glsl + env['LIBS'], + LIBS = wgl + glapi + mesa + drivers + gallium + glsl + env['LIBS'], ) diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index a0293fe9b4b..9482e8f9b11 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -23,17 +23,14 @@ INCLUDE_DIRS = \ -I$(TOP)/src/gallium/auxiliary DEFINES += \ - -DGALLIUM_SOFTPIPE \ - -DGALLIUM_TRACE \ - -DGALLIUM_BRW + -DGALLIUM_SOFTPIPE #-DGALLIUM_CELL will be defined by the config */ XLIB_WINSYS_SOURCES = \ xlib.c \ xlib_cell.c \ xlib_llvmpipe.c \ - xlib_softpipe.c \ - xlib_trace.c + xlib_softpipe.c XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index dfe550f733b..713841aeb1b 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -22,7 +22,7 @@ if env['platform'] == 'linux' \ 'xlib.c', ] - drivers = [] + drivers = [trace] if 'softpipe' in env['drivers']: env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') @@ -42,16 +42,11 @@ if env['platform'] == 'linux' \ sources += ['xlib_cell.c'] drivers += [cell] - if 'trace' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_TRACE') - sources += ['xlib_trace.c'] - drivers += [trace] - # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions libgl = env.SharedLibrary( target ='GL', source = sources, - LIBS = st_xlib + glapi + mesa + drivers + auxiliaries + env['LIBS'], + LIBS = st_xlib + glapi + mesa + glsl + drivers + gallium + env['LIBS'], ) env.InstallSharedLibrary(libgl, version=(1, 5)) diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c index 163cc8863cb..6dbe05f193e 100644 --- a/src/gallium/winsys/xlib/xlib.c +++ b/src/gallium/winsys/xlib/xlib.c @@ -42,7 +42,6 @@ */ enum mode { - MODE_TRACE, MODE_CELL, MODE_LLVMPIPE, MODE_SOFTPIPE @@ -51,9 +50,6 @@ enum mode { static enum mode get_mode() { - if (getenv("XMESA_TRACE")) - return MODE_TRACE; - #ifdef GALLIUM_CELL if (!getenv("GALLIUM_NOCELL")) return MODE_CELL; @@ -73,11 +69,6 @@ static void _init( void ) enum mode xlib_mode = get_mode(); switch (xlib_mode) { - case MODE_TRACE: -#if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE) - xmesa_set_driver( &xlib_trace_driver ); -#endif - break; case MODE_CELL: #if defined(GALLIUM_CELL) xmesa_set_driver( &xlib_cell_driver ); diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h index f0855035f77..8e091d0c084 100644 --- a/src/gallium/winsys/xlib/xlib.h +++ b/src/gallium/winsys/xlib/xlib.h @@ -5,7 +5,6 @@ #include "pipe/p_compiler.h" #include "xm_winsys.h" -extern struct xm_driver xlib_trace_driver; extern struct xm_driver xlib_softpipe_driver; extern struct xm_driver xlib_llvmpipe_driver; extern struct xm_driver xlib_cell_driver; diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c new file mode 100644 index 00000000000..fc9addd09e3 --- /dev/null +++ b/src/gallium/winsys/xlib/xlib_brw_context.c @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +/* #include "glxheader.h" */ +/* #include "xmesaP.h" */ + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "i965simple/brw_winsys.h" +#include "xlib_brw_aub.h" +#include "xlib_brw.h" + + + + +#define XBCWS_BATCHBUFFER_SIZE 1024 + + +/* The backend to the brw driver (ie struct brw_winsys) is actually a + * per-context entity. + */ +struct xlib_brw_context_winsys { + struct brw_winsys brw_context_winsys; /**< batch buffer funcs */ + struct aub_context *aub; + + struct pipe_winsys *pipe_winsys; + + unsigned batch_data[XBCWS_BATCHBUFFER_SIZE]; + unsigned batch_nr; + unsigned batch_size; + unsigned batch_alloc; +}; + + +/* Turn a brw_winsys into an xlib_brw_context_winsys: + */ +static inline struct xlib_brw_context_winsys * +xlib_brw_context_winsys( struct brw_winsys *sws ) +{ + return (struct xlib_brw_context_winsys *)sws; +} + + +/* Simple batchbuffer interface: + */ + +static unsigned *xbcws_batch_start( struct brw_winsys *sws, + unsigned dwords, + unsigned relocs ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + if (xbcws->batch_size < xbcws->batch_nr + dwords) + return NULL; + + xbcws->batch_alloc = xbcws->batch_nr + dwords; + return (void *)1; /* not a valid pointer! */ +} + +static void xbcws_batch_dword( struct brw_winsys *sws, + unsigned dword ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr < xbcws->batch_alloc); + xbcws->batch_data[xbcws->batch_nr++] = dword; +} + +static void xbcws_batch_reloc( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr < xbcws->batch_alloc); + xbcws->batch_data[xbcws->batch_nr++] = + ( xlib_brw_get_buffer_offset( NULL, buf, access_flags ) + + delta ); +} + +static void xbcws_batch_end( struct brw_winsys *sws ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr <= xbcws->batch_alloc); + xbcws->batch_alloc = 0; +} + +static void xbcws_batch_flush( struct brw_winsys *sws, + struct pipe_fence_handle **fence ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + assert(xbcws->batch_nr <= xbcws->batch_size); + + if (xbcws->batch_nr) { + xlib_brw_commands_aub( xbcws->pipe_winsys, + xbcws->batch_data, + xbcws->batch_nr ); + } + + xbcws->batch_nr = 0; +} + + + +/* Really a per-device function, just pass through: + */ +static unsigned xbcws_get_buffer_offset( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + return xlib_brw_get_buffer_offset( xbcws->pipe_winsys, + buf, + access_flags ); +} + + +/* Really a per-device function, just pass through: + */ +static void xbcws_buffer_subdata_typed( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + xlib_brw_buffer_subdata_typed( xbcws->pipe_winsys, + buf, + offset, + size, + data, + data_type ); +} + + +/** + * Create i965 hardware rendering context, but plugged into a + * dump-to-aubfile backend. + */ +struct pipe_context * +xlib_create_brw_context( struct pipe_screen *screen, + void *unused ) +{ + struct xlib_brw_context_winsys *xbcws = CALLOC_STRUCT( xlib_brw_context_winsys ); + + /* Fill in this struct with callbacks that i965simple will need to + * communicate with the window system, buffer manager, etc. + */ + xbcws->brw_context_winsys.batch_start = xbcws_batch_start; + xbcws->brw_context_winsys.batch_dword = xbcws_batch_dword; + xbcws->brw_context_winsys.batch_reloc = xbcws_batch_reloc; + xbcws->brw_context_winsys.batch_end = xbcws_batch_end; + xbcws->brw_context_winsys.batch_flush = xbcws_batch_flush; + xbcws->brw_context_winsys.buffer_subdata_typed = xbcws_buffer_subdata_typed; + xbcws->brw_context_winsys.get_buffer_offset = xbcws_get_buffer_offset; + + xbcws->pipe_winsys = screen->winsys; /* redundant */ + + xbcws->batch_size = XBCWS_BATCHBUFFER_SIZE; + + /* Create the i965simple context: + */ +#ifdef GALLIUM_CELL + return NULL; +#else + return brw_create( screen, + &xbcws->brw_context_winsys, + 0 ); +#endif +} diff --git a/src/gallium/winsys/xlib/xlib_trace.c b/src/gallium/winsys/xlib/xlib_trace.c deleted file mode 100644 index dbea655ab45..00000000000 --- a/src/gallium/winsys/xlib/xlib_trace.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/* - * Authors: - * Keith Whitwell - * Brian Paul - */ - - -#include "xlib.h" - -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#include "trace/tr_texture.h" - -#include "pipe/p_screen.h" - - - -static struct pipe_screen * -xlib_create_trace_screen( void ) -{ - struct pipe_screen *screen, *trace_screen; - - screen = xlib_softpipe_driver.create_pipe_screen(); - if (screen == NULL) - goto fail; - - /* Wrap it: - */ - trace_screen = trace_screen_create(screen); - if (trace_screen == NULL) - goto fail; - - return trace_screen; - -fail: - if (screen) - screen->destroy( screen ); - return NULL; -} - -static struct pipe_context * -xlib_create_trace_context( struct pipe_screen *_screen, - void *priv ) -{ - struct trace_screen *tr_scr = trace_screen( _screen ); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_context *pipe, *trace_pipe; - - pipe = xlib_softpipe_driver.create_pipe_context( screen, priv ); - if (pipe == NULL) - goto fail; - - /* Wrap it: - */ - trace_pipe = trace_context_create(_screen, pipe); - if (trace_pipe == NULL) - goto fail; - - trace_pipe->priv = priv; - - return trace_pipe; - -fail: - if (pipe) - pipe->destroy( pipe ); - return NULL; -} - -static void -xlib_trace_display_surface( struct xmesa_buffer *buffer, - struct pipe_surface *_surf ) -{ - struct trace_surface *tr_surf = trace_surface( _surf ); - struct pipe_surface *surf = tr_surf->surface; - - xlib_softpipe_driver.display_surface( buffer, surf ); -} - - -struct xm_driver xlib_trace_driver = -{ - .create_pipe_screen = xlib_create_trace_screen, - .create_pipe_context = xlib_create_trace_context, - .display_surface = xlib_trace_display_surface, -}; |